diff --git a/clang/lib/AST/Interp/ByteCodeExprGen.cpp b/clang/lib/AST/Interp/ByteCodeExprGen.cpp index 485893d58f487a..a2cf682b2532bd 100644 --- a/clang/lib/AST/Interp/ByteCodeExprGen.cpp +++ b/clang/lib/AST/Interp/ByteCodeExprGen.cpp @@ -200,16 +200,13 @@ bool ByteCodeExprGen::VisitCastExpr(const CastExpr *CE) { if (!this->visit(SubExpr)) return false; - if (FromT == ToT) { - assert(ToT != PT_IntAP && ToT != PT_IntAPS); - return true; - } - if (ToT == PT_IntAP) return this->emitCastAP(*FromT, Ctx.getBitWidth(CE->getType()), CE); if (ToT == PT_IntAPS) return this->emitCastAPS(*FromT, Ctx.getBitWidth(CE->getType()), CE); + if (FromT == ToT) + return true; return this->emitCast(*FromT, *ToT, CE); } diff --git a/clang/test/AST/Interp/intap.cpp b/clang/test/AST/Interp/intap.cpp index db9f516131af47..45961e6fc74b7a 100644 --- a/clang/test/AST/Interp/intap.cpp +++ b/clang/test/AST/Interp/intap.cpp @@ -30,6 +30,17 @@ static_assert(UBitIntZero1 == 0, ""); constexpr unsigned _BitInt(2) BI1 = 3u; static_assert(BI1 == 3, ""); +namespace APCast { + constexpr _BitInt(10) A = 1; + constexpr _BitInt(11) B = A; + static_assert(B == 1, ""); + constexpr _BitInt(16) B2 = A; + static_assert(B2 == 1, ""); + constexpr _BitInt(32) B3 = A; + static_assert(B3 == 1, ""); + constexpr unsigned _BitInt(32) B4 = A; + static_assert(B4 == 1, ""); +} #ifdef __SIZEOF_INT128__ namespace i128 { diff --git a/flang/include/flang/Optimizer/Transforms/Passes.h b/flang/include/flang/Optimizer/Transforms/Passes.h index 30d97be3800c19..92bc7246eca700 100644 --- a/flang/include/flang/Optimizer/Transforms/Passes.h +++ b/flang/include/flang/Optimizer/Transforms/Passes.h @@ -75,8 +75,6 @@ std::unique_ptr createAlgebraicSimplificationPass(const mlir::GreedyRewriteConfig &config); std::unique_ptr createPolymorphicOpConversionPass(); -std::unique_ptr> -createOMPEarlyOutliningPass(); std::unique_ptr createOMPFunctionFilteringPass(); std::unique_ptr> createOMPMarkDeclareTargetPass(); diff --git a/flang/include/flang/Optimizer/Transforms/Passes.td b/flang/include/flang/Optimizer/Transforms/Passes.td index 6d211a535b53f7..6e23b87b7e276e 100644 --- a/flang/include/flang/Optimizer/Transforms/Passes.td +++ b/flang/include/flang/Optimizer/Transforms/Passes.td @@ -318,18 +318,6 @@ def LoopVersioning : Pass<"loop-versioning", "mlir::func::FuncOp"> { let dependentDialects = [ "fir::FIROpsDialect" ]; } -def OMPEarlyOutliningPass - : Pass<"omp-early-target-outlining", "mlir::ModuleOp"> { - let summary = "Outlines all target ops into separate functions"; - let description = [{ - This pass outlines all omp.target operations into individual functions. - It is invoked in the front end after the initial FIR has been constructed. - This pass is only needed when compiling for the target device to prevent - the optimizer to perform transforms across target region boundaries. - }]; - let constructor = "::fir::createOMPEarlyOutliningPass()"; -} - def OMPMarkDeclareTargetPass : Pass<"omp-mark-declare-target", "mlir::ModuleOp"> { let summary = "Marks all functions called by an OpenMP declare target function as declare target"; diff --git a/flang/include/flang/Tools/CLOptions.inc b/flang/include/flang/Tools/CLOptions.inc index 2ed716382feb43..c452c023b4a80c 100644 --- a/flang/include/flang/Tools/CLOptions.inc +++ b/flang/include/flang/Tools/CLOptions.inc @@ -274,10 +274,8 @@ inline void createHLFIRToFIRPassPipeline( inline void createOpenMPFIRPassPipeline( mlir::PassManager &pm, bool isTargetDevice) { pm.addPass(fir::createOMPMarkDeclareTargetPass()); - if (isTargetDevice) { - pm.addPass(fir::createOMPEarlyOutliningPass()); + if (isTargetDevice) pm.addPass(fir::createOMPFunctionFilteringPass()); - } } #if !defined(FLANG_EXCLUDE_CODEGEN) diff --git a/flang/lib/Lower/ConvertVariable.cpp b/flang/lib/Lower/ConvertVariable.cpp index 57fb9fc432de2f..e8137886d2cf54 100644 --- a/flang/lib/Lower/ConvertVariable.cpp +++ b/flang/lib/Lower/ConvertVariable.cpp @@ -1645,9 +1645,13 @@ void Fortran::lower::genDeclareSymbol( !sym.detailsIf()) { fir::FirOpBuilder &builder = converter.getFirOpBuilder(); const mlir::Location loc = genLocation(converter, sym); + // FIXME: Using the ultimate symbol for translating symbol attributes will + // lead to situations where the VOLATILE/ASYNCHRONOUS attributes are not + // propagated to the hlfir.declare (these attributes can be added when + // using module variables). fir::FortranVariableFlagsAttr attributes = - Fortran::lower::translateSymbolAttributes(builder.getContext(), sym, - extraFlags); + Fortran::lower::translateSymbolAttributes( + builder.getContext(), sym.GetUltimate(), extraFlags); auto name = converter.mangleName(sym); hlfir::EntityWithAttributes declare = hlfir::genDeclare(loc, builder, exv, name, attributes); diff --git a/flang/lib/Lower/OpenMP.cpp b/flang/lib/Lower/OpenMP.cpp index 4a73ee87579c71..087f535ff06408 100644 --- a/flang/lib/Lower/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP.cpp @@ -560,11 +560,21 @@ class ClauseProcessor { mlir::Value &result) const; bool processLink(llvm::SmallVectorImpl &result) const; + + // This method is used to process a map clause. + // The optional parameters - mapSymTypes, mapSymLocs & mapSymbols are used to + // store the original type, location and Fortran symbol for the map operands. + // They may be used later on to create the block_arguments for some of the + // target directives that require it. bool processMap(mlir::Location currentLocation, const llvm::omp::Directive &directive, Fortran::semantics::SemanticsContext &semanticsContext, Fortran::lower::StatementContext &stmtCtx, - llvm::SmallVectorImpl &mapOperands) const; + llvm::SmallVectorImpl &mapOperands, + llvm::SmallVectorImpl *mapSymTypes = nullptr, + llvm::SmallVectorImpl *mapSymLocs = nullptr, + llvm::SmallVectorImpl + *mapSymbols = nullptr) const; bool processReduction( mlir::Location currentLocation, llvm::SmallVectorImpl &reductionVars, @@ -1691,31 +1701,29 @@ static mlir::omp::MapInfoOp createMapInfoOp(fir::FirOpBuilder &builder, mlir::Location loc, mlir::Value baseAddr, std::stringstream &name, mlir::SmallVector bounds, uint64_t mapType, - mlir::omp::VariableCaptureKind mapCaptureType, bool implicit, - mlir::Type retTy) { - mlir::Value varPtrPtr; + mlir::omp::VariableCaptureKind mapCaptureType, mlir::Type retTy, + bool isVal = false) { + mlir::Value val, varPtr, varPtrPtr; + mlir::TypeAttr varType; + if (auto boxTy = baseAddr.getType().dyn_cast()) { baseAddr = builder.create(loc, baseAddr); retTy = baseAddr.getType(); } - mlir::omp::MapInfoOp op = - builder.create(loc, retTy, baseAddr); - op.setVarTypeAttr(mlir::TypeAttr::get( - llvm::dyn_cast(retTy).getElementType())); - op.setNameAttr(builder.getStringAttr(name.str())); - op.setImplicit(implicit); - op.setMapType(mapType); - op.setMapCaptureType(mapCaptureType); - - unsigned insPos = 1; - if (varPtrPtr) - op->insertOperands(insPos++, varPtrPtr); - if (bounds.size() > 0) - op->insertOperands(insPos, bounds); - op->setAttr(mlir::omp::MapInfoOp::getOperandSegmentSizeAttr(), - builder.getDenseI32ArrayAttr( - {1, varPtrPtr ? 1 : 0, static_cast(bounds.size())})); + if (isVal) + val = baseAddr; + else + varPtr = baseAddr; + + if (auto ptrType = llvm::dyn_cast(retTy)) + varType = mlir::TypeAttr::get(ptrType.getElementType()); + + mlir::omp::MapInfoOp op = builder.create( + loc, retTy, val, varPtr, varType, varPtrPtr, bounds, + builder.getIntegerAttr(builder.getIntegerType(64, false), mapType), + builder.getAttr(mapCaptureType), + builder.getStringAttr(name.str())); return op; } @@ -1723,7 +1731,11 @@ bool ClauseProcessor::processMap( mlir::Location currentLocation, const llvm::omp::Directive &directive, Fortran::semantics::SemanticsContext &semanticsContext, Fortran::lower::StatementContext &stmtCtx, - llvm::SmallVectorImpl &mapOperands) const { + llvm::SmallVectorImpl &mapOperands, + llvm::SmallVectorImpl *mapSymTypes, + llvm::SmallVectorImpl *mapSymLocs, + llvm::SmallVectorImpl *mapSymbols) + const { fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); return findRepeatableClause( [&](const ClauseTy::Map *mapClause, @@ -1783,13 +1795,20 @@ bool ClauseProcessor::processMap( // Explicit map captures are captured ByRef by default, // optimisation passes may alter this to ByCopy or other capture // types to optimise - mapOperands.push_back(createMapInfoOp( + mlir::Value mapOp = createMapInfoOp( firOpBuilder, clauseLocation, baseAddr, asFortran, bounds, static_cast< std::underlying_type_t>( mapTypeBits), - mlir::omp::VariableCaptureKind::ByRef, false, - baseAddr.getType())); + mlir::omp::VariableCaptureKind::ByRef, baseAddr.getType()); + + mapOperands.push_back(mapOp); + if (mapSymTypes) + mapSymTypes->push_back(baseAddr.getType()); + if (mapSymLocs) + mapSymLocs->push_back(baseAddr.getLoc()); + if (mapSymbols) + mapSymbols->push_back(getOmpObjectSymbol(ompObject)); } }); } @@ -2172,7 +2191,7 @@ static void createBodyOfOp( } } -static void createBodyOfTargetDataOp( +static void genBodyOfTargetDataOp( Fortran::lower::AbstractConverter &converter, Fortran::lower::pft::Evaluation &eval, mlir::omp::DataOp &dataOp, const llvm::SmallVector &useDeviceTypes, @@ -2188,18 +2207,17 @@ static void createBodyOfTargetDataOp( unsigned argIndex = 0; for (const Fortran::semantics::Symbol *sym : useDeviceSymbols) { const mlir::BlockArgument &arg = region.front().getArgument(argIndex); - mlir::Value val = fir::getBase(arg); fir::ExtendedValue extVal = converter.getSymbolExtendedValue(*sym); - if (auto refType = val.getType().dyn_cast()) { + if (auto refType = arg.getType().dyn_cast()) { if (fir::isa_builtin_cptr_type(refType.getElementType())) { - converter.bindSymbol(*sym, val); + converter.bindSymbol(*sym, arg); } else { extVal.match( [&](const fir::MutableBoxValue &mbv) { converter.bindSymbol( *sym, fir::MutableBoxValue( - val, fir::factory::getNonDeferredLenParams(extVal), {})); + arg, fir::factory::getNonDeferredLenParams(extVal), {})); }, [&](const auto &) { TODO(converter.getCurrentLocation(), @@ -2407,8 +2425,8 @@ genDataOp(Fortran::lower::AbstractConverter &converter, auto dataOp = converter.getFirOpBuilder().create( currentLocation, ifClauseOperand, deviceOperand, devicePtrOperands, deviceAddrOperands, mapOperands); - createBodyOfTargetDataOp(converter, eval, dataOp, useDeviceTypes, - useDeviceLocs, useDeviceSymbols, currentLocation); + genBodyOfTargetDataOp(converter, eval, dataOp, useDeviceTypes, useDeviceLocs, + useDeviceSymbols, currentLocation); return dataOp; } @@ -2451,6 +2469,101 @@ genEnterExitDataOp(Fortran::lower::AbstractConverter &converter, deviceOperand, nowaitAttr, mapOperands); } +// This functions creates a block for the body of the targetOp's region. It adds +// all the symbols present in mapSymbols as block arguments to this block. +static void genBodyOfTargetOp( + Fortran::lower::AbstractConverter &converter, + Fortran::lower::pft::Evaluation &eval, mlir::omp::TargetOp &targetOp, + const llvm::SmallVector &mapSymTypes, + const llvm::SmallVector &mapSymLocs, + const llvm::SmallVector &mapSymbols, + const mlir::Location ¤tLocation) { + assert(mapSymTypes.size() == mapSymLocs.size()); + + fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); + mlir::Region ®ion = targetOp.getRegion(); + + firOpBuilder.createBlock(®ion, {}, mapSymTypes, mapSymLocs); + + unsigned argIndex = 0; + unsigned blockArgsIndex = mapSymbols.size(); + + // The block arguments contain the map_operands followed by the bounds in + // order. This returns a vector containing the next 'n' block arguments for + // the bounds. + auto extractBoundArgs = [&](auto n) { + llvm::SmallVector argExtents; + while (n--) { + argExtents.push_back(fir::getBase(region.getArgument(blockArgsIndex))); + blockArgsIndex++; + } + return argExtents; + }; + + // Bind the symbols to their corresponding block arguments. + for (const Fortran::semantics::Symbol *sym : mapSymbols) { + const mlir::BlockArgument &arg = region.getArgument(argIndex); + fir::ExtendedValue extVal = converter.getSymbolExtendedValue(*sym); + extVal.match( + [&](const fir::BoxValue &v) { + converter.bindSymbol( + *sym, + fir::BoxValue(arg, extractBoundArgs(v.getLBounds().size()), + v.getExplicitParameters(), v.getExplicitExtents())); + }, + [&](const fir::MutableBoxValue &v) { + converter.bindSymbol( + *sym, + fir::MutableBoxValue(arg, extractBoundArgs(v.getLBounds().size()), + v.getMutableProperties())); + }, + [&](const fir::ArrayBoxValue &v) { + converter.bindSymbol( + *sym, + fir::ArrayBoxValue(arg, extractBoundArgs(v.getExtents().size()), + extractBoundArgs(v.getLBounds().size()), + v.getSourceBox())); + }, + [&](const fir::CharArrayBoxValue &v) { + converter.bindSymbol( + *sym, + fir::CharArrayBoxValue(arg, v.getLen(), + extractBoundArgs(v.getExtents().size()), + extractBoundArgs(v.getLBounds().size()))); + }, + [&](const fir::CharBoxValue &v) { + converter.bindSymbol(*sym, fir::CharBoxValue(arg, v.getLen())); + }, + [&](const fir::UnboxedValue &v) { converter.bindSymbol(*sym, arg); }, + [&](const auto &) { + TODO(converter.getCurrentLocation(), + "target map clause operand unsupported type"); + }); + argIndex++; + } + + // Insert dummy instruction to remember the insertion position. The + // marker will be deleted since there are not uses. + // In the HLFIR flow there are hlfir.declares inserted above while + // setting block arguments. + mlir::Value undefMarker = firOpBuilder.create( + targetOp.getOperation()->getLoc(), firOpBuilder.getIndexType()); + + // Create blocks for unstructured regions. This has to be done since + // blocks are initially allocated with the function as the parent region. + // the parent region of blocks. + if (eval.lowerAsUnstructured()) { + Fortran::lower::createEmptyRegionBlocks( + firOpBuilder, eval.getNestedEvaluations()); + } + + firOpBuilder.create(currentLocation); + + // Create the insertion point after the marker. + firOpBuilder.setInsertionPointAfter(undefMarker.getDefiningOp()); +} + static mlir::omp::TargetOp genTargetOp(Fortran::lower::AbstractConverter &converter, Fortran::lower::pft::Evaluation &eval, @@ -2462,6 +2575,9 @@ genTargetOp(Fortran::lower::AbstractConverter &converter, mlir::Value ifClauseOperand, deviceOperand, threadLimitOperand; mlir::UnitAttr nowaitAttr; llvm::SmallVector mapOperands; + llvm::SmallVector mapSymTypes; + llvm::SmallVector mapSymLocs; + llvm::SmallVector mapSymbols; ClauseProcessor cp(converter, clauseList); cp.processIf(stmtCtx, @@ -2471,7 +2587,7 @@ genTargetOp(Fortran::lower::AbstractConverter &converter, cp.processThreadLimit(stmtCtx, threadLimitOperand); cp.processNowait(nowaitAttr); cp.processMap(currentLocation, directive, semanticsContext, stmtCtx, - mapOperands); + mapOperands, &mapSymTypes, &mapSymLocs, &mapSymbols); cp.processTODO( currentLocation, llvm::omp::Directive::OMPD_target); - return genOpWithBody( - converter, eval, currentLocation, outerCombined, &clauseList, - ifClauseOperand, deviceOperand, threadLimitOperand, nowaitAttr, - mapOperands); + // 5.8.1 Implicit Data-Mapping Attribute Rules + // The following code follows the implicit data-mapping rules to map all the + // symbols used inside the region that have not been explicitly mapped using + // the map clause. + auto captureImplicitMap = [&](const Fortran::semantics::Symbol &sym) { + if (llvm::find(mapSymbols, &sym) == mapSymbols.end()) { + mlir::Value baseOp = converter.getSymbolAddress(sym); + if (!baseOp) + if (const auto *details = sym.template detailsIf< + Fortran::semantics::HostAssocDetails>()) { + baseOp = converter.getSymbolAddress(details->symbol()); + converter.copySymbolBinding(details->symbol(), sym); + } + + if (baseOp) { + llvm::SmallVector bounds; + std::stringstream name; + fir::ExtendedValue dataExv = converter.getSymbolExtendedValue(sym); + name << sym.name().ToString(); + + mlir::Value baseAddr = + getDataOperandBaseAddr(converter, converter.getFirOpBuilder(), sym, + converter.getCurrentLocation()); + if (fir::unwrapRefType(baseAddr.getType()).isa()) + bounds = + Fortran::lower::genBoundsOpsFromBox( + converter.getFirOpBuilder(), converter.getCurrentLocation(), + converter, dataExv, baseAddr); + if (fir::unwrapRefType(baseAddr.getType()).isa()) + bounds = Fortran::lower::genBaseBoundsOps( + converter.getFirOpBuilder(), converter.getCurrentLocation(), + converter, dataExv, baseAddr); + + llvm::omp::OpenMPOffloadMappingFlags mapFlag = + llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT; + mlir::omp::VariableCaptureKind captureKind = + mlir::omp::VariableCaptureKind::ByRef; + if (auto refType = baseOp.getType().dyn_cast()) { + auto eleType = refType.getElementType(); + if (fir::isa_trivial(eleType) || fir::isa_char(eleType)) { + captureKind = mlir::omp::VariableCaptureKind::ByCopy; + } else if (!fir::isa_builtin_cptr_type(eleType)) { + mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO; + mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM; + } + } + + mlir::Value mapOp = createMapInfoOp( + converter.getFirOpBuilder(), baseOp.getLoc(), baseOp, name, bounds, + static_cast< + std::underlying_type_t>( + mapFlag), + captureKind, baseOp.getType()); + + mapOperands.push_back(mapOp); + mapSymTypes.push_back(baseOp.getType()); + mapSymLocs.push_back(baseOp.getLoc()); + mapSymbols.push_back(&sym); + } + } + }; + Fortran::lower::pft::visitAllSymbols(eval, captureImplicitMap); + + // Add the bounds and extents for box values to mapOperands + auto addMapInfoForBounds = [&](const auto &bounds) { + for (auto &val : bounds) { + mapSymLocs.push_back(val.getLoc()); + mapSymTypes.push_back(val.getType()); + + llvm::SmallVector bounds; + std::stringstream name; + + mlir::Value mapOp = createMapInfoOp( + converter.getFirOpBuilder(), val.getLoc(), val, name, bounds, + static_cast< + std::underlying_type_t>( + llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT), + mlir::omp::VariableCaptureKind::ByCopy, val.getType(), true); + mapOperands.push_back(mapOp); + } + }; + + for (const Fortran::semantics::Symbol *sym : mapSymbols) { + fir::ExtendedValue extVal = converter.getSymbolExtendedValue(*sym); + extVal.match( + [&](const fir::BoxValue &v) { addMapInfoForBounds(v.getLBounds()); }, + [&](const fir::MutableBoxValue &v) { + addMapInfoForBounds(v.getLBounds()); + }, + [&](const fir::ArrayBoxValue &v) { + addMapInfoForBounds(v.getExtents()); + addMapInfoForBounds(v.getLBounds()); + }, + [&](const fir::CharArrayBoxValue &v) { + addMapInfoForBounds(v.getExtents()); + addMapInfoForBounds(v.getLBounds()); + }, + [&](const auto &) { + // Nothing to do for non-box values. + }); + } + + auto targetOp = converter.getFirOpBuilder().create( + currentLocation, ifClauseOperand, deviceOperand, threadLimitOperand, + nowaitAttr, mapOperands); + + genBodyOfTargetOp(converter, eval, targetOp, mapSymTypes, mapSymLocs, + mapSymbols, currentLocation); + + return targetOp; } static mlir::omp::TeamsOp @@ -2868,8 +3092,6 @@ genOMP(Fortran::lower::AbstractConverter &converter, !std::get_if(&clause.u) && !std::get_if(&clause.u) && !std::get_if(&clause.u) && - !std::get_if(&clause.u) && - !std::get_if(&clause.u) && !std::get_if(&clause.u) && !std::get_if(&clause.u) && !std::get_if(&clause.u) && diff --git a/flang/lib/Optimizer/Transforms/CMakeLists.txt b/flang/lib/Optimizer/Transforms/CMakeLists.txt index 98314fa7a2087f..03b67104a93b57 100644 --- a/flang/lib/Optimizer/Transforms/CMakeLists.txt +++ b/flang/lib/Optimizer/Transforms/CMakeLists.txt @@ -17,7 +17,6 @@ add_flang_library(FIRTransforms AddDebugFoundation.cpp PolymorphicOpConversion.cpp LoopVersioning.cpp - OMPEarlyOutlining.cpp OMPFunctionFiltering.cpp OMPMarkDeclareTarget.cpp VScaleAttr.cpp diff --git a/flang/lib/Optimizer/Transforms/OMPEarlyOutlining.cpp b/flang/lib/Optimizer/Transforms/OMPEarlyOutlining.cpp deleted file mode 100644 index 92fbdd0bbf5d4a..00000000000000 --- a/flang/lib/Optimizer/Transforms/OMPEarlyOutlining.cpp +++ /dev/null @@ -1,303 +0,0 @@ -#include "flang/Optimizer/Dialect/FIRDialect.h" -#include "flang/Optimizer/Dialect/FIROps.h" -#include "flang/Optimizer/Dialect/FIRType.h" -#include "flang/Optimizer/HLFIR/HLFIROps.h" -#include "flang/Optimizer/Support/InternalNames.h" -#include "flang/Optimizer/Transforms/Passes.h" -#include "mlir/Dialect/Func/IR/FuncOps.h" -#include "mlir/Dialect/LLVMIR/LLVMDialect.h" -#include "mlir/Dialect/OpenMP/OpenMPDialect.h" -#include "mlir/IR/BuiltinDialect.h" -#include "mlir/IR/BuiltinOps.h" -#include "mlir/IR/IRMapping.h" -#include "mlir/IR/Operation.h" -#include "mlir/IR/SymbolTable.h" -#include "mlir/Pass/Pass.h" -#include "mlir/Support/LLVM.h" -#include "mlir/Transforms/RegionUtils.h" -#include "llvm/Frontend/OpenMP/OMPIRBuilder.h" - -namespace fir { -#define GEN_PASS_DEF_OMPEARLYOUTLININGPASS -#include "flang/Optimizer/Transforms/Passes.h.inc" -} // namespace fir - -namespace { -class OMPEarlyOutliningPass - : public fir::impl::OMPEarlyOutliningPassBase { - - std::string getOutlinedFnName(llvm::StringRef parentName, unsigned count) { - return std::string(parentName) + "_omp_outline_" + std::to_string(count); - } - - // Given a value this function will iterate over an operators results - // and return the relevant index for the result the value corresponds to. - // There may be a simpler way to do this however. - static unsigned getResultIndex(mlir::Value value, mlir::Operation *op) { - for (unsigned i = 0; i < op->getNumResults(); ++i) { - if (op->getResult(i) == value) - return i; - } - return 0; - } - - static bool isAddressOfGlobalDeclareTarget(mlir::Value value) { - if (fir::AddrOfOp addressOfOp = - mlir::dyn_cast_if_present(value.getDefiningOp())) - if (fir::GlobalOp gOp = mlir::dyn_cast_if_present( - addressOfOp->getParentOfType().lookupSymbol( - addressOfOp.getSymbol()))) - if (auto declareTargetGlobal = - llvm::dyn_cast( - gOp.getOperation())) - if (declareTargetGlobal.isDeclareTarget()) - return true; - return false; - } - - // Currently used for cloning arguments that are nested. Should be - // extendable where required, perhaps via operation - // specialisation/overloading, if something needs specialised handling. - // NOTE: Results in duplication of some values that would otherwise be - // a single SSA value shared between operations, this is tidied up on - // lowering to some extent. - static mlir::Operation * - cloneArgAndChildren(mlir::OpBuilder &builder, mlir::Operation *op, - llvm::SetVector &inputs, - mlir::Block::BlockArgListType &newInputs) { - mlir::IRMapping valueMap; - for (mlir::Value opValue : op->getOperands()) { - if (opValue.getDefiningOp()) { - unsigned resIdx = getResultIndex(opValue, opValue.getDefiningOp()); - valueMap.map(opValue, - cloneArgAndChildren(builder, opValue.getDefiningOp(), - inputs, newInputs) - ->getResult(resIdx)); - } else { - for (auto inArg : llvm::zip(inputs, newInputs)) { - if (opValue == std::get<0>(inArg)) - valueMap.map(opValue, std::get<1>(inArg)); - } - } - } - - return builder.clone(*op, valueMap); - } - - static void cloneMapOpVariables(mlir::OpBuilder &builder, - mlir::IRMapping &valueMap, - mlir::IRMapping &mapInfoMap, - llvm::SetVector &inputs, - mlir::Block::BlockArgListType &newInputs, - mlir::Value varPtr) { - if (fir::BoxAddrOp boxAddrOp = - mlir::dyn_cast_if_present(varPtr.getDefiningOp())) { - mlir::Value newV = - cloneArgAndChildren(builder, boxAddrOp, inputs, newInputs) - ->getResult(0); - mapInfoMap.map(varPtr, newV); - valueMap.map(boxAddrOp, newV); - return; - } - - // Clone into the outlined function all hlfir.declare ops that define inputs - // to the target region and set up remapping of its inputs and outputs. - if (auto declareOp = mlir::dyn_cast_if_present( - varPtr.getDefiningOp())) { - auto clone = llvm::cast( - cloneArgAndChildren(builder, declareOp, inputs, newInputs)); - mlir::Value newBase = clone.getBase(); - mlir::Value newOrigBase = clone.getOriginalBase(); - mapInfoMap.map(varPtr, newOrigBase); - valueMap.map(declareOp.getBase(), newBase); - valueMap.map(declareOp.getOriginalBase(), newOrigBase); - return; - } - - if (isAddressOfGlobalDeclareTarget(varPtr)) { - fir::AddrOfOp addrOp = - mlir::dyn_cast(varPtr.getDefiningOp()); - mlir::Value newV = builder.clone(*addrOp)->getResult(0); - mapInfoMap.map(varPtr, newV); - valueMap.map(addrOp, newV); - return; - } - - for (auto inArg : llvm::zip(inputs, newInputs)) { - if (varPtr == std::get<0>(inArg)) - mapInfoMap.map(varPtr, std::get<1>(inArg)); - } - } - - mlir::func::FuncOp outlineTargetOp(mlir::OpBuilder &builder, - mlir::omp::TargetOp &targetOp, - mlir::func::FuncOp &parentFunc, - unsigned count) { - // NOTE: once implicit captures are handled appropriately in the initial - // PFT lowering if it is possible, we can remove the usage of - // getUsedValuesDefinedAbove and instead just iterate over the target op's - // operands (or just the map arguments) and perhaps refactor this function - // a little. - // Collect inputs - llvm::SetVector inputs; - mlir::Region &targetRegion = targetOp.getRegion(); - mlir::getUsedValuesDefinedAbove(targetRegion, inputs); - - // Collect all map info. Even non-used maps must be collected to avoid ICEs. - for (mlir::Value oper : targetOp->getOperands()) { - if (auto mapEntry = - mlir::dyn_cast(oper.getDefiningOp())) { - if (!inputs.contains(mapEntry.getVarPtr())) - inputs.insert(mapEntry.getVarPtr()); - } - } - - // Filter out declare-target and map entries which are specially handled - // at the moment, so we do not wish these to end up as function arguments - // which would just be more noise in the IR. - llvm::SmallVector blockArgs; - for (llvm::SetVector::iterator iter = inputs.begin(); iter != inputs.end();) { - if (mlir::isa_and_nonnull(iter->getDefiningOp()) || - isAddressOfGlobalDeclareTarget(*iter)) { - iter = inputs.erase(iter); - } else if (auto declareOp = mlir::dyn_cast_if_present( - iter->getDefiningOp())) { - // Gather hlfir.declare arguments to be added later, after the - // hlfir.declare operation itself has been removed as an input. - blockArgs.push_back(declareOp.getMemref()); - if (mlir::Value shape = declareOp.getShape()) - blockArgs.push_back(shape); - for (mlir::Value typeParam : declareOp.getTypeparams()) - blockArgs.push_back(typeParam); - iter = inputs.erase(iter); - } else { - ++iter; - } - } - - // Add function arguments to the list of inputs if they are used by an - // hlfir.declare operation. - for (mlir::Value arg : blockArgs) { - if (!arg.getDefiningOp() && !inputs.contains(arg)) - inputs.insert(arg); - } - - // Create new function and initialize - mlir::FunctionType funcType = builder.getFunctionType( - mlir::TypeRange(inputs.getArrayRef()), mlir::TypeRange()); - std::string parentName(parentFunc.getName()); - std::string funcName = getOutlinedFnName(parentName, count); - mlir::Location loc = targetOp.getLoc(); - mlir::func::FuncOp newFunc = - mlir::func::FuncOp::create(loc, funcName, funcType); - mlir::Block *entryBlock = newFunc.addEntryBlock(); - builder.setInsertionPointToStart(entryBlock); - mlir::Block::BlockArgListType newInputs = entryBlock->getArguments(); - - // Set the declare target information, the outlined function - // is always a host function. - if (auto parentDTOp = llvm::dyn_cast( - parentFunc.getOperation())) - if (auto newDTOp = llvm::dyn_cast( - newFunc.getOperation())) - newDTOp.setDeclareTarget(mlir::omp::DeclareTargetDeviceType::host, - parentDTOp.getDeclareTargetCaptureClause()); - - // Set the early outlining interface parent name - if (auto earlyOutlineOp = - llvm::dyn_cast( - newFunc.getOperation())) - earlyOutlineOp.setParentName(parentName); - - // The value map for the newly generated Target Operation, we must - // remap most of the input. - mlir::IRMapping valueMap; - - // Special handling for map, declare target and regular map variables - // are handled slightly differently for the moment, declare target has - // its addressOfOp cloned over, whereas we skip it for the regular map - // variables. We need knowledge of which global is linked to the map - // operation for declare target, whereas we aren't bothered for the - // regular map variables for the moment. We could treat both the same, - // however, cloning across the minimum for the moment to avoid - // optimisations breaking segments of the lowering seems prudent as this - // was the original intent of the pass. - for (mlir::Value oper : targetOp->getOperands()) { - if (auto mapEntry = - mlir::dyn_cast(oper.getDefiningOp())) { - mlir::IRMapping mapInfoMap; - for (mlir::Value bound : mapEntry.getBounds()) { - if (auto mapEntryBound = mlir::dyn_cast( - bound.getDefiningOp())) { - mapInfoMap.map(bound, cloneArgAndChildren(builder, mapEntryBound, - inputs, newInputs) - ->getResult(0)); - } - } - - cloneMapOpVariables(builder, valueMap, mapInfoMap, inputs, newInputs, - mapEntry.getVarPtr()); - - if (mapEntry.getVarPtrPtr()) - cloneMapOpVariables(builder, valueMap, mapInfoMap, inputs, newInputs, - mapEntry.getVarPtrPtr()); - - valueMap.map( - mapEntry, - builder.clone(*mapEntry.getOperation(), mapInfoMap)->getResult(0)); - } - } - - for (auto inArg : llvm::zip(inputs, newInputs)) - valueMap.map(std::get<0>(inArg), std::get<1>(inArg)); - - // Clone the target op into the new function - builder.clone(*(targetOp.getOperation()), valueMap); - - // Create return op - builder.create(loc); - - return newFunc; - } - - // Returns true if a target region was found in the function. - bool outlineTargetOps(mlir::OpBuilder &builder, - mlir::func::FuncOp &functionOp, - mlir::ModuleOp &moduleOp, - llvm::SmallVectorImpl &newFuncs) { - unsigned count = 0; - for (auto TargetOp : functionOp.getOps()) { - mlir::func::FuncOp outlinedFunc = - outlineTargetOp(builder, TargetOp, functionOp, count); - newFuncs.push_back(outlinedFunc); - count++; - } - return count > 0; - } - - void runOnOperation() override { - mlir::ModuleOp moduleOp = getOperation(); - mlir::MLIRContext *context = &getContext(); - mlir::OpBuilder builder(context); - llvm::SmallVector newFuncs; - - for (auto functionOp : - llvm::make_early_inc_range(moduleOp.getOps())) { - bool outlined = outlineTargetOps(builder, functionOp, moduleOp, newFuncs); - if (outlined) - functionOp.erase(); - } - - for (auto newFunc : newFuncs) - moduleOp.push_back(newFunc); - } -}; - -} // namespace - -namespace fir { -std::unique_ptr> -createOMPEarlyOutliningPass() { - return std::make_unique(); -} -} // namespace fir diff --git a/flang/test/Driver/OpenMP/map-types-and-sizes.f90 b/flang/test/Driver/OpenMP/map-types-and-sizes.f90 new file mode 100644 index 00000000000000..e4f429e479af15 --- /dev/null +++ b/flang/test/Driver/OpenMP/map-types-and-sizes.f90 @@ -0,0 +1,52 @@ +!RUN: %flang_fc1 -emit-llvm -fopenmp %s -o - | FileCheck %s + +!=============================================================================== +! Check MapTypes for target implicit captures +!=============================================================================== + +!CHECK: @.offload_sizes = private unnamed_addr constant [1 x i64] [i64 4] +!CHECK: @.offload_maptypes = private unnamed_addr constant [1 x i64] [i64 800] +subroutine mapType_scalar + integer :: a + !$omp target + a = 10 + !$omp end target +end subroutine mapType_scalar + +!CHECK: @.offload_sizes{{.*}} = private unnamed_addr constant [1 x i64] [i64 4096] +!CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [1 x i64] [i64 547] +subroutine mapType_array + integer :: a(1024) + !$omp target + a(10) = 20 + !$omp end target +end subroutine mapType_array + +!CHECK: @.offload_sizes{{.*}} = private unnamed_addr constant [1 x i64] [i64 8] +!CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [1 x i64] [i64 547] +subroutine mapType_ptr + integer, pointer :: a + !$omp target + a = 10 + !$omp end target +end subroutine mapType_ptr + +!CHECK: @.offload_sizes{{.*}} = private unnamed_addr constant [2 x i64] [i64 8, i64 4] +!CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [2 x i64] [i64 544, i64 800] +subroutine mapType_c_ptr + use iso_c_binding, only : c_ptr, c_loc + type(c_ptr) :: a + integer, target :: b + !$omp target + a = c_loc(b) + !$omp end target +end subroutine mapType_c_ptr + +!CHECK: @.offload_sizes{{.*}} = private unnamed_addr constant [1 x i64] [i64 1] +!CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [1 x i64] [i64 800] +subroutine mapType_char + character :: a + !$omp target + a = 'b' + !$omp end target +end subroutine mapType_char diff --git a/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir b/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir index ecfb8e52cae783..6efa4d0a095869 100644 --- a/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir +++ b/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir @@ -433,12 +433,13 @@ func.func @_QPomp_target() { %1 = arith.subi %c512, %c1 : index %2 = omp.bounds lower_bound(%c0 : index) upper_bound(%1 : index) extent(%c512 : index) stride(%c1 : index) start_idx(%c1 : index) %3 = omp.map_info var_ptr(%0 : !fir.ref>, !fir.array<512xi32>) map_clauses(tofrom) capture(ByRef) bounds(%2) -> !fir.ref> {name = "a"} - omp.target thread_limit(%c64_i32 : i32) map_entries(%3 : !fir.ref>) { + omp.target thread_limit(%c64_i32 : i32) map_entries(%3 -> %arg0 : !fir.ref>) { + ^bb0(%arg0: !fir.ref>): %c10_i32 = arith.constant 10 : i32 %c1_i64 = arith.constant 1 : i64 %c1_i64_0 = arith.constant 1 : i64 %4 = arith.subi %c1_i64, %c1_i64_0 : i64 - %5 = fir.coordinate_of %0, %4 : (!fir.ref>, i64) -> !fir.ref + %5 = fir.coordinate_of %arg0, %4 : (!fir.ref>, i64) -> !fir.ref fir.store %c10_i32 to %5 : !fir.ref omp.terminator } @@ -455,12 +456,13 @@ func.func @_QPomp_target() { // CHECK: %[[UPPER:.*]] = llvm.mlir.constant(511 : index) : i64 // CHECK: %[[BOUNDS:.*]] = omp.bounds lower_bound(%[[LOWER]] : i64) upper_bound(%[[UPPER]] : i64) extent(%[[EXTENT]] : i64) stride(%[[STRIDE]] : i64) start_idx(%[[STRIDE]] : i64) // CHECK: %[[MAP:.*]] = omp.map_info var_ptr(%2 : !llvm.ptr, !llvm.array<512 x i32>) map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS]]) -> !llvm.ptr {name = "a"} -// CHECK: omp.target thread_limit(%[[VAL_2]] : i32) map_entries(%[[MAP]] : !llvm.ptr) { +// CHECK: omp.target thread_limit(%[[VAL_2]] : i32) map_entries(%[[MAP]] -> %[[ARG_0:.*]] : !llvm.ptr) { +// CHECK: ^bb0(%[[ARG_0]]: !llvm.ptr): // CHECK: %[[VAL_3:.*]] = llvm.mlir.constant(10 : i32) : i32 // CHECK: %[[VAL_4:.*]] = llvm.mlir.constant(1 : i64) : i64 // CHECK: %[[VAL_5:.*]] = llvm.mlir.constant(1 : i64) : i64 // CHECK: %[[VAL_6:.*]] = llvm.mlir.constant(0 : i64) : i64 -// CHECK: %[[VAL_7:.*]] = llvm.getelementptr %[[VAL_1]][0, %[[VAL_6]]] : (!llvm.ptr, i64) -> !llvm.ptr +// CHECK: %[[VAL_7:.*]] = llvm.getelementptr %[[ARG_0]][0, %[[VAL_6]]] : (!llvm.ptr, i64) -> !llvm.ptr // CHECK: llvm.store %[[VAL_3]], %[[VAL_7]] : i32, !llvm.ptr // CHECK: omp.terminator // CHECK: } diff --git a/flang/test/Lower/OpenMP/FIR/array-bounds.f90 b/flang/test/Lower/OpenMP/FIR/array-bounds.f90 index a17e4b9dfad36d..abef31af22ba66 100644 --- a/flang/test/Lower/OpenMP/FIR/array-bounds.f90 +++ b/flang/test/Lower/OpenMP/FIR/array-bounds.f90 @@ -1,37 +1,22 @@ -!RUN: %flang_fc1 -emit-fir -fopenmp %s -o - | FileCheck %s --check-prefixes HOST -!RUN: %flang_fc1 -emit-fir -fopenmp -fopenmp-is-target-device %s -o - | FileCheck %s --check-prefixes DEVICE +!RUN: %flang_fc1 -emit-fir -fopenmp %s -o - | FileCheck %s --check-prefixes=HOST,ALL +!RUN: %flang_fc1 -emit-fir -fopenmp -fopenmp-is-target-device %s -o - | FileCheck %s --check-prefixes=DEVICE,ALL -!DEVICE-LABEL: func.func @_QPread_write_section_omp_outline_0( -!DEVICE-SAME: %[[ARG0:.*]]: !fir.ref, %[[ARG1:.*]]: !fir.ref>, %[[ARG2:.*]]: !fir.ref>) attributes {omp.declare_target = #omp.declaretarget, omp.outline_parent_name = "_QPread_write_section"} { -!DEVICE: %[[C1:.*]] = arith.constant 1 : index -!DEVICE: %[[C2:.*]] = arith.constant 4 : index -!DEVICE: %[[C3:.*]] = arith.constant 1 : index -!DEVICE: %[[C4:.*]] = arith.constant 1 : index -!DEVICE: %[[BOUNDS0:.*]] = omp.bounds lower_bound(%[[C1]] : index) upper_bound(%[[C2]] : index) stride(%[[C4]] : index) start_idx(%[[C4]] : index) -!DEVICE: %[[MAP0:.*]] = omp.map_info var_ptr(%[[ARG1]] : !fir.ref>, !fir.array<10xi32>) map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS0]]) -> !fir.ref> {name = "sp_read(2:5)"} -!DEVICE: %[[C5:.*]] = arith.constant 1 : index -!DEVICE: %[[C6:.*]] = arith.constant 4 : index -!DEVICE: %[[C7:.*]] = arith.constant 1 : index -!DEVICE: %[[C8:.*]] = arith.constant 1 : index -!DEVICE: %[[BOUNDS1:.*]] = omp.bounds lower_bound(%[[C5]] : index) upper_bound(%[[C6]] : index) stride(%[[C8]] : index) start_idx(%[[C8]] : index) -!DEVICE: %[[MAP1:.*]] = omp.map_info var_ptr(%[[ARG2]] : !fir.ref>, !fir.array<10xi32>) map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS1]]) -> !fir.ref> {name = "sp_write(2:5)"} -!DEVICE: omp.target map_entries(%[[MAP0]], %[[MAP1]] : !fir.ref>, !fir.ref>) { - -!HOST-LABEL: func.func @_QPread_write_section() { -!HOST: %0 = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFread_write_sectionEi"} -!HOST: %[[READ:.*]] = fir.address_of(@_QFread_write_sectionEsp_read) : !fir.ref> -!HOST: %[[WRITE:.*]] = fir.address_of(@_QFread_write_sectionEsp_write) : !fir.ref> -!HOST: %[[C1:.*]] = arith.constant 1 : index -!HOST: %[[C2:.*]] = arith.constant 1 : index -!HOST: %[[C3:.*]] = arith.constant 4 : index -!HOST: %[[BOUNDS0:.*]] = omp.bounds lower_bound(%[[C2]] : index) upper_bound(%[[C3]] : index) stride(%[[C1]] : index) start_idx(%[[C1]] : index) -!HOST: %[[MAP0:.*]] = omp.map_info var_ptr(%[[READ]] : !fir.ref>, !fir.array<10xi32>) map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS0]]) -> !fir.ref> {name = "sp_read(2:5)"} -!HOST: %[[C4:.*]] = arith.constant 1 : index -!HOST: %[[C5:.*]] = arith.constant 1 : index -!HOST: %[[C6:.*]] = arith.constant 4 : index -!HOST: %[[BOUNDS1:.*]] = omp.bounds lower_bound(%[[C5]] : index) upper_bound(%[[C6]] : index) stride(%[[C4]] : index) start_idx(%[[C4]] : index) -!HOST: %[[MAP1:.*]] = omp.map_info var_ptr(%[[WRITE]] : !fir.ref>, !fir.array<10xi32>) map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS1]]) -> !fir.ref> {name = "sp_write(2:5)"} -!HOST: omp.target map_entries(%[[MAP0]], %[[MAP1]] : !fir.ref>, !fir.ref>) { +!ALL-LABEL: func.func @_QPread_write_section( +!ALL: %[[ITER:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFread_write_sectionEi"} +!ALL: %[[READ:.*]] = fir.address_of(@_QFread_write_sectionEsp_read) : !fir.ref> +!ALL: %[[WRITE:.*]] = fir.address_of(@_QFread_write_sectionEsp_write) : !fir.ref> +!ALL: %[[C1:.*]] = arith.constant 1 : index +!ALL: %[[C2:.*]] = arith.constant 1 : index +!ALL: %[[C3:.*]] = arith.constant 4 : index +!ALL: %[[BOUNDS0:.*]] = omp.bounds lower_bound(%[[C2]] : index) upper_bound(%[[C3]] : index) stride(%[[C1]] : index) start_idx(%[[C1]] : index) +!ALL: %[[MAP0:.*]] = omp.map_info var_ptr(%[[READ]] : !fir.ref>, !fir.array<10xi32>) map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS0]]) -> !fir.ref> {name = "sp_read(2:5)"} +!ALL: %[[C4:.*]] = arith.constant 1 : index +!ALL: %[[C5:.*]] = arith.constant 1 : index +!ALL: %[[C6:.*]] = arith.constant 4 : index +!ALL: %[[BOUNDS1:.*]] = omp.bounds lower_bound(%[[C5]] : index) upper_bound(%[[C6]] : index) stride(%[[C4]] : index) start_idx(%[[C4]] : index) +!ALL: %[[MAP1:.*]] = omp.map_info var_ptr(%[[WRITE]] : !fir.ref>, !fir.array<10xi32>) map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS1]]) -> !fir.ref> {name = "sp_write(2:5)"} +!ALL: %[[MAP2:.*]] = omp.map_info var_ptr(%[[ITER]] : !fir.ref, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !fir.ref {name = "i"} +!ALL: omp.target map_entries(%[[MAP0]] -> %{{.*}}, %[[MAP1]] -> %{{.*}}, %[[MAP2]] -> %{{.*}}, %{{.*}} -> %{{.*}}, %{{.*}} -> %{{.*}} : !fir.ref>, !fir.ref>, !fir.ref, index, index) { subroutine read_write_section() integer :: sp_read(10) = (/1,2,3,4,5,6,7,8,9,10/) @@ -44,33 +29,22 @@ subroutine read_write_section() !$omp end target end subroutine read_write_section - module assumed_array_routines - contains -!DEVICE-LABEL: func.func @_QMassumed_array_routinesPassumed_shape_array_omp_outline_0( -!DEVICE-SAME: %[[ARG0:.*]]: !fir.ref, %[[ARG1:.*]]: !fir.box>, %[[ARG2:.*]]: !fir.ref>) attributes {omp.declare_target = #omp.declaretarget, omp.outline_parent_name = "_QMassumed_array_routinesPassumed_shape_array"} { -!DEVICE: %[[C0:.*]] = arith.constant 1 : index -!DEVICE: %[[C1:.*]] = arith.constant 4 : index -!DEVICE: %[[C2:.*]] = arith.constant 0 : index -!DEVICE: %[[C3:.*]]:3 = fir.box_dims %[[ARG1]], %[[C2]] : (!fir.box>, index) -> (index, index, index) -!DEVICE: %[[C4:.*]] = arith.constant 1 : index -!DEVICE: %[[BOUNDS:.*]] = omp.bounds lower_bound(%[[C0]] : index) upper_bound(%[[C1]] : index) stride(%[[C3]]#2 : index) start_idx(%[[C4]] : index) {stride_in_bytes = true} -!DEVICE: %[[ARGADDR:.*]] = fir.box_addr %[[ARG1]] : (!fir.box>) -> !fir.ref> -!DEVICE: %[[MAP:.*]] = omp.map_info var_ptr(%[[ARGADDR]] : !fir.ref>, !fir.array) map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS]]) -> !fir.ref> {name = "arr_read_write(2:5)"} -!DEVICE: omp.target map_entries(%[[MAP]] : !fir.ref>) { +contains +!ALL-LABEL: func.func @_QMassumed_array_routinesPassumed_shape_array( +!ALL-SAME: %[[ARG0:.*]]: !fir.box> {fir.bindc_name = "arr_read_write"}) { +!ALL: %[[ALLOCA:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QMassumed_array_routinesFassumed_shape_arrayEi"} +!ALL: %[[C0:.*]] = arith.constant 1 : index +!ALL: %[[C1:.*]] = arith.constant 0 : index +!ALL: %[[C2:.*]]:3 = fir.box_dims %arg0, %[[C1]] : (!fir.box>, index) -> (index, index, index) +!ALL: %[[C3:.*]] = arith.constant 1 : index +!ALL: %[[C4:.*]] = arith.constant 4 : index +!ALL: %[[BOUNDS:.*]] = omp.bounds lower_bound(%[[C3]] : index) upper_bound(%[[C4]] : index) stride(%[[C2]]#2 : index) start_idx(%[[C0]] : index) {stride_in_bytes = true} +!ALL: %[[ADDROF:.*]] = fir.box_addr %arg0 : (!fir.box>) -> !fir.ref> +!ALL: %[[MAP:.*]] = omp.map_info var_ptr(%[[ADDROF]] : !fir.ref>, !fir.array) map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS]]) -> !fir.ref> {name = "arr_read_write(2:5)"} +!ALL: %[[MAP2:.*]] = omp.map_info var_ptr(%[[ALLOCA]] : !fir.ref, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !fir.ref {name = "i"} +!ALL: omp.target map_entries(%[[MAP]] -> %{{.*}}, %[[MAP2]] -> %{{.*}} : !fir.ref>, !fir.ref) { -!HOST-LABEL: func.func @_QMassumed_array_routinesPassumed_shape_array( -!HOST-SAME: %[[ARG0:.*]]: !fir.box> {fir.bindc_name = "arr_read_write"}) { -!HOST: %[[ALLOCA:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QMassumed_array_routinesFassumed_shape_arrayEi"} -!HOST: %[[C0:.*]] = arith.constant 1 : index -!HOST: %[[C1:.*]] = arith.constant 0 : index -!HOST: %[[C2:.*]]:3 = fir.box_dims %arg0, %[[C1]] : (!fir.box>, index) -> (index, index, index) -!HOST: %[[C3:.*]] = arith.constant 1 : index -!HOST: %[[C4:.*]] = arith.constant 4 : index -!HOST: %[[BOUNDS:.*]] = omp.bounds lower_bound(%[[C3]] : index) upper_bound(%[[C4]] : index) stride(%[[C2]]#2 : index) start_idx(%[[C0]] : index) {stride_in_bytes = true} -!HOST: %[[ADDROF:.*]] = fir.box_addr %arg0 : (!fir.box>) -> !fir.ref> -!HOST: %[[MAP:.*]] = omp.map_info var_ptr(%[[ADDROF]] : !fir.ref>, !fir.array) map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS]]) -> !fir.ref> {name = "arr_read_write(2:5)"} -!HOST: omp.target map_entries(%[[MAP]] : !fir.ref>) { subroutine assumed_shape_array(arr_read_write) integer, intent(inout) :: arr_read_write(:) @@ -81,25 +55,17 @@ subroutine assumed_shape_array(arr_read_write) !$omp end target end subroutine assumed_shape_array -!DEVICE-LABEL: func.func @_QMassumed_array_routinesPassumed_size_array_omp_outline_0( -!DEVICE-SAME: %[[ARG0:.*]]: !fir.ref, %[[ARG1:.*]]: !fir.ref>) attributes {omp.declare_target = #omp.declaretarget, omp.outline_parent_name = "_QMassumed_array_routinesPassumed_size_array"} { -!DEVICE: %[[C0:.*]] = arith.constant 1 : index -!DEVICE: %[[C1:.*]] = arith.constant 4 : index -!DEVICE: %[[C2:.*]] = arith.constant 1 : index -!DEVICE: %[[C3:.*]] = arith.constant 1 : index -!DEVICE: %[[BOUNDS:.*]] = omp.bounds lower_bound(%[[C0]] : index) upper_bound(%[[C1]] : index) stride(%[[C3]] : index) start_idx(%[[C3]] : index) -!DEVICE: %[[MAP:.*]] = omp.map_info var_ptr(%[[ARG1]] : !fir.ref>, !fir.array) map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS]]) -> !fir.ref> {name = "arr_read_write(2:5)"} -!DEVICE: omp.target map_entries(%[[MAP]] : !fir.ref>) { +!ALL-LABEL: func.func @_QMassumed_array_routinesPassumed_size_array( +!ALL-SAME: %[[ARG0:.*]]: !fir.ref> {fir.bindc_name = "arr_read_write"}) { +!ALL: %[[ALLOCA:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QMassumed_array_routinesFassumed_size_arrayEi"} +!ALL: %[[C0:.*]] = arith.constant 1 : index +!ALL: %[[C1:.*]] = arith.constant 1 : index +!ALL: %[[C2:.*]] = arith.constant 4 : index +!ALL: %[[BOUNDS:.*]] = omp.bounds lower_bound(%[[C1]] : index) upper_bound(%[[C2]] : index) stride(%[[C0]] : index) start_idx(%[[C0]] : index) +!ALL: %[[MAP:.*]] = omp.map_info var_ptr(%[[ARG0]] : !fir.ref>, !fir.array) map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS]]) -> !fir.ref> {name = "arr_read_write(2:5)"} +!ALL: %[[MAP2:.*]] = omp.map_info var_ptr(%[[ALLOCA]] : !fir.ref, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !fir.ref {name = "i"} +!ALL: omp.target map_entries(%[[MAP]] -> %{{.*}}, %[[MAP2]] -> %{{.*}}, %{{.*}} -> %{{.*}} : !fir.ref>, !fir.ref, index) { -!HOST-LABEL: func.func @_QMassumed_array_routinesPassumed_size_array( -!HOST-SAME: %[[ARG0:.*]]: !fir.ref> {fir.bindc_name = "arr_read_write"}) { -!HOST: %[[ALLOCA:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QMassumed_array_routinesFassumed_size_arrayEi"} -!HOST: %[[C0:.*]] = arith.constant 1 : index -!HOST: %[[C1:.*]] = arith.constant 1 : index -!HOST: %[[C2:.*]] = arith.constant 4 : index -!HOST: %[[BOUNDS:.*]] = omp.bounds lower_bound(%[[C1]] : index) upper_bound(%[[C2]] : index) stride(%[[C0]] : index) start_idx(%[[C0]] : index) -!HOST: %[[MAP:.*]] = omp.map_info var_ptr(%[[ARG0]] : !fir.ref>, !fir.array) map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS]]) -> !fir.ref> {name = "arr_read_write(2:5)"} -!HOST: omp.target map_entries(%[[MAP]] : !fir.ref>) { subroutine assumed_size_array(arr_read_write) integer, intent(inout) :: arr_read_write(*) @@ -111,6 +77,7 @@ subroutine assumed_size_array(arr_read_write) end subroutine assumed_size_array end module assumed_array_routines +!DEVICE-NOT:func.func @_QPcall_assumed_shape_and_size_array() { !HOST-LABEL:func.func @_QPcall_assumed_shape_and_size_array() { !HOST:%{{.*}} = arith.constant 20 : index diff --git a/flang/test/Lower/OpenMP/FIR/location.f90 b/flang/test/Lower/OpenMP/FIR/location.f90 index 0e36e09b19e194..64837783767032 100644 --- a/flang/test/Lower/OpenMP/FIR/location.f90 +++ b/flang/test/Lower/OpenMP/FIR/location.f90 @@ -17,7 +17,7 @@ subroutine sub_parallel() !CHECK-LABEL: sub_target subroutine sub_target() print *, x -!CHECK: omp.target { +!CHECK: omp.target {{.*}} { !$omp target print *, x !CHECK: omp.terminator loc(#[[TAR_LOC:.*]]) diff --git a/flang/test/Lower/OpenMP/FIR/omp-target-early-outlining.f90 b/flang/test/Lower/OpenMP/FIR/omp-target-early-outlining.f90 deleted file mode 100644 index 14bf911b3e5410..00000000000000 --- a/flang/test/Lower/OpenMP/FIR/omp-target-early-outlining.f90 +++ /dev/null @@ -1,89 +0,0 @@ -!REQUIRES: amdgpu-registered-target - -!RUN: %flang_fc1 -triple amdgcn-amd-amdhsa -emit-fir -fopenmp -fopenmp-is-target-device %s -o - | FileCheck %s -!RUN: %flang_fc1 -triple x86_64-unknown-linux-gnu -emit-fir -fopenmp -fopenmp-is-target-device %s -o - | FileCheck %s -!RUN: bbc -emit-fir -fopenmp -fopenmp-is-target-device %s -o - | FileCheck %s -!RUN: bbc -emit-fir -fopenmp -fopenmp-is-gpu -fopenmp-is-target-device %s -o - | FileCheck %s - -!CHECK: func.func @_QPtarget_function - -!CHECK: func.func @_QPwrite_index_omp_outline_0(%[[ARG0:.*]]: !fir.ref) attributes {omp.declare_target = #omp.declaretarget, omp.outline_parent_name = "_QPwrite_index"} { -!CHECK-NEXT: %[[map_info0:.*]] = omp.map_info var_ptr(%[[ARG0]]{{.*}} -!CHECK-NEXT: omp.target map_entries(%[[map_info0]]{{.*}} { -!CHECK: %[[CONSTANT_VALUE_10:.*]] = arith.constant 10 : i32 -!CHECK: fir.store %[[CONSTANT_VALUE_10]] to %[[ARG0]] : !fir.ref -!CHECK: omp.terminator -!CHECK-NEXT: } -!CHECK-NEXT: return - -!CHECK: func.func @_QPwrite_index_omp_outline_1(%[[ARG1:.*]]: !fir.ref) attributes {omp.declare_target = #omp.declaretarget, omp.outline_parent_name = "_QPwrite_index"} { -!CHECK-NEXT: %[[map_info1:.*]] = omp.map_info var_ptr(%[[ARG1]]{{.*}} -!CHECK-NEXT: omp.target map_entries(%[[map_info1]]{{.*}} { -!CHECK: %[[CONSTANT_VALUE_20:.*]] = arith.constant 20 : i32 -!CHECK: fir.store %[[CONSTANT_VALUE_20]] to %[[ARG1]] : !fir.ref -!CHECK: omp.terminator -!CHECK-NEXT: } -!CHECK-NEXT: return - - -SUBROUTINE WRITE_INDEX(INT_ARRAY) - INTEGER :: INT_ARRAY(*) - INTEGER :: NEW_LEN -!$omp target map(from:new_len) - NEW_LEN = 10 -!$omp end target -!$omp target map(from:new_len) - NEW_LEN = 20 -!$omp end target - do INDEX_ = 1, NEW_LEN - INT_ARRAY(INDEX_) = INDEX_ - end do -end subroutine WRITE_INDEX - -SUBROUTINE TARGET_FUNCTION() -!$omp declare target -END - -!CHECK: func.func @_QParray_bounds_omp_outline_0(%[[ARG0:.*]]: !fir.ref, %[[ARG1:.*]]: !fir.ref>) attributes {omp.declare_target = #omp.declaretarget, omp.outline_parent_name = "_QParray_bounds"} { -!CHECK: %[[C1:.*]] = arith.constant 1 : index -!CHECK: %[[C4:.*]] = arith.constant 4 : index -!CHECK: %[[C1_0:.*]] = arith.constant 1 : index -!CHECK: %[[C1_1:.*]] = arith.constant 1 : index -!CHECK: %[[BOUNDS:.*]] = omp.bounds lower_bound(%[[C1]] : index) upper_bound(%[[C4]] : index) stride(%[[C1_1]] : index) start_idx(%[[C1_1]] : index) -!CHECK: %[[ENTRY:.*]] = omp.map_info var_ptr(%[[ARG1]] : !fir.ref>, !fir.array<10xi32>) map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS]]) -> !fir.ref> {name = "sp_write(2:5)"} -!CHECK: omp.target map_entries(%[[ENTRY]] : !fir.ref>) { -!CHECK: %c2_i32 = arith.constant 2 : i32 -!CHECK: %2 = fir.convert %c2_i32 : (i32) -> index -!CHECK: %c5_i32 = arith.constant 5 : i32 -!CHECK: %3 = fir.convert %c5_i32 : (i32) -> index -!CHECK: %c1_2 = arith.constant 1 : index -!CHECK: %4 = fir.convert %2 : (index) -> i32 -!CHECK: %5:2 = fir.do_loop %arg2 = %2 to %3 step %c1_2 iter_args(%arg3 = %4) -> (index, i32) { -!CHECK: fir.store %arg3 to %[[ARG0]] : !fir.ref -!CHECK: %6 = fir.load %[[ARG0]] : !fir.ref -!CHECK: %7 = fir.load %[[ARG0]] : !fir.ref -!CHECK: %8 = fir.convert %7 : (i32) -> i64 -!CHECK: %c1_i64 = arith.constant 1 : i64 -!CHECK: %9 = arith.subi %8, %c1_i64 : i64 -!CHECK: %10 = fir.coordinate_of %[[ARG1]], %9 : (!fir.ref>, i64) -> !fir.ref -!CHECK: fir.store %6 to %10 : !fir.ref -!CHECK: %11 = arith.addi %arg2, %c1_2 : index -!CHECK: %12 = fir.convert %c1_2 : (index) -> i32 -!CHECK: %13 = fir.load %[[ARG0]] : !fir.ref -!CHECK: %14 = arith.addi %13, %12 : i32 -!CHECK: fir.result %11, %14 : index, i32 -!CHECK: } -!CHECK: fir.store %5#1 to %[[ARG0]] : !fir.ref -!CHECK: omp.terminator -!CHECK: } -!CHECK:return -!CHECK:} - -SUBROUTINE ARRAY_BOUNDS() - INTEGER :: sp_write(10) = (/0,0,0,0,0,0,0,0,0,0/) -!$omp target map(tofrom:sp_write(2:5)) - do i = 2, 5 - sp_write(i) = i - end do -!$omp end target -end subroutine ARRAY_BOUNDS diff --git a/flang/test/Lower/OpenMP/FIR/target.f90 b/flang/test/Lower/OpenMP/FIR/target.f90 index 95c57c9a224014..d5a8fb242de921 100644 --- a/flang/test/Lower/OpenMP/FIR/target.f90 +++ b/flang/test/Lower/OpenMP/FIR/target.f90 @@ -189,13 +189,14 @@ subroutine omp_target integer :: a(1024) !CHECK: %[[BOUNDS:.*]] = omp.bounds lower_bound({{.*}}) upper_bound({{.*}}) extent({{.*}}) stride({{.*}}) start_idx({{.*}}) !CHECK: %[[MAP:.*]] = omp.map_info var_ptr(%[[VAL_0]] : !fir.ref>, !fir.array<1024xi32>) map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS]]) -> !fir.ref> {name = "a"} - !CHECK: omp.target map_entries(%[[MAP]] : !fir.ref>) { + !CHECK: omp.target map_entries(%[[MAP]] -> %[[ARG_0:.*]], %{{.*}} -> %{{.*}} : !fir.ref>, index) { + !CHECK: ^bb0(%[[ARG_0]]: !fir.ref>, %{{.*}}: index): !$omp target map(tofrom: a) !CHECK: %[[VAL_1:.*]] = arith.constant 10 : i32 !CHECK: %[[VAL_2:.*]] = arith.constant 1 : i64 !CHECK: %[[VAL_3:.*]] = arith.constant 1 : i64 !CHECK: %[[VAL_4:.*]] = arith.subi %[[VAL_2]], %[[VAL_3]] : i64 - !CHECK: %[[VAL_5:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_4]] : (!fir.ref>, i64) -> !fir.ref + !CHECK: %[[VAL_5:.*]] = fir.coordinate_of %[[ARG_0]], %[[VAL_4]] : (!fir.ref>, i64) -> !fir.ref !CHECK: fir.store %[[VAL_1]] to %[[VAL_5]] : !fir.ref a(1) = 10 !CHECK: omp.terminator @@ -203,6 +204,72 @@ subroutine omp_target !CHECK: } end subroutine omp_target +!=============================================================================== +! Target implicit capture +!=============================================================================== + +!CHECK-LABEL: func.func @_QPomp_target_implicit() { +subroutine omp_target_implicit + !CHECK: %[[VAL_0:.*]] = fir.alloca !fir.array<1024xi32> {bindc_name = "a", uniq_name = "_QFomp_target_implicitEa"} + integer :: a(1024) + !CHECK: %[[MAP:.*]] = omp.map_info var_ptr(%[[VAL_0]] : !fir.ref>, !fir.array<1024xi32>) map_clauses(implicit, tofrom) capture(ByRef) bounds(%{{.*}}) -> !fir.ref> {name = "a"} + !CHECK: omp.target map_entries(%[[MAP]] -> %[[ARG_0:.*]], %{{.*}} -> %{{.*}} : !fir.ref>, index) { + !CHECK: ^bb0(%[[ARG_0]]: !fir.ref>, %{{.*}}: index): + !$omp target + !CHECK: %[[VAL_5:.*]] = fir.coordinate_of %[[ARG_0]], %{{.*}} : (!fir.ref>, i64) -> !fir.ref + a(1) = 10 + !CHECK: omp.terminator + !$omp end target + !CHECK: } +end subroutine omp_target_implicit + +!=============================================================================== +! Target implicit capture nested +!=============================================================================== + +!CHECK-LABEL: func.func @_QPomp_target_implicit_nested() { +subroutine omp_target_implicit_nested + integer::a, b + !CHECK: omp.target map_entries(%{{.*}} -> %[[ARG0:.*]], %{{.*}} -> %[[ARG1:.*]] : !fir.ref, !fir.ref) { + !CHECK: ^bb0(%[[ARG0]]: !fir.ref, %[[ARG1]]: !fir.ref): + !$omp target + !CHECK: fir.store %{{.*}} to %[[ARG0]] : !fir.ref + a = 10 + !$omp parallel + !CHECK: fir.store %{{.*}} to %[[ARG1]] : !fir.ref + b = 20 + !CHECK: omp.terminator + !$omp end parallel + !CHECK: omp.terminator + !$omp end target + !CHECK: } +end subroutine omp_target_implicit_nested + +!=============================================================================== +! Target implicit capture with bounds +!=============================================================================== + +!CHECK-LABEL: func.func @_QPomp_target_implicit_bounds(%{{.*}}: !fir.ref {fir.bindc_name = "n"}) { +subroutine omp_target_implicit_bounds(n) + !CHECK: %[[VAL_1:.*]] = arith.select %{{.*}}, %{{.*}}, %{{.*}} : index + !CHECK: %[[VAL_2:.*]] = arith.select %{{.*}}, %{{.*}}, %{{.*}} : index + !CHECK: %[[VAL_3:.*]] = fir.alloca !fir.array, %[[VAL_1]] {bindc_name = "a", uniq_name = "_QFomp_target_implicit_boundsEa"} + integer :: n + integer :: a(n, 1024) + !CHECK: %[[VAL_4:.*]] = omp.map_info var_ptr(%[[VAL_3]] : !fir.ref>, !fir.array) map_clauses(implicit, tofrom) capture(ByRef) bounds(%{{.*}}) -> !fir.ref> {name = "a"} + !CHECK: %[[VAL_5:.*]] = omp.map_info val(%[[VAL_1]] : index) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> index {name = ""} + !CHECK: %[[VAL_6:.*]] = omp.map_info val(%[[VAL_2]] : index) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> index {name = ""} + !CHECK: omp.target map_entries(%[[VAL_4]] -> %[[ARG_1:.*]], %[[VAL_5]] -> %[[ARG_2:.*]], %[[VAL_6]] -> %[[ARG_3:.*]] : !fir.ref>, index, index) { + !CHECK: ^bb0(%[[ARG_1]]: !fir.ref>, %[[ARG_2]]: index, %[[ARG_3]]: index): + !$omp target + !CHECK: %{{.*}} = fir.convert %[[ARG_1]] : (!fir.ref>) -> !fir.ref> + !CHECK: %{{.*}} = arith.muli %{{.*}}, %[[ARG_2]] : index + a(11,22) = 33 + !CHECK: omp.terminator + !$omp end target +!CHECK: } +end subroutine omp_target_implicit_bounds + !=============================================================================== ! Target `thread_limit` clause !=============================================================================== @@ -212,7 +279,8 @@ subroutine omp_target_thread_limit integer :: a !CHECK: %[[VAL_1:.*]] = arith.constant 64 : i32 !CHECK: %[[MAP:.*]] = omp.map_info var_ptr({{.*}}) map_clauses(tofrom) capture(ByRef) -> !fir.ref {name = "a"} - !CHECK: omp.target thread_limit(%[[VAL_1]] : i32) map_entries(%[[MAP]] : !fir.ref) { + !CHECK: omp.target thread_limit(%[[VAL_1]] : i32) map_entries(%[[MAP]] -> %[[ARG_0:.*]] : !fir.ref) { + !CHECK: ^bb0(%[[ARG_0]]: !fir.ref): !$omp target map(tofrom: a) thread_limit(64) a = 10 !CHECK: omp.terminator @@ -274,23 +342,25 @@ subroutine omp_target_parallel_do !CHECK: %[[C0:.*]] = arith.constant 0 : index !CHECK: %[[SUB:.*]] = arith.subi %[[C1024]], %[[C1]] : index !CHECK: %[[BOUNDS:.*]] = omp.bounds lower_bound(%[[C0]] : index) upper_bound(%[[SUB]] : index) extent(%[[C1024]] : index) stride(%[[C1]] : index) start_idx(%[[C1]] : index) - !CHECK: %[[MAP:.*]] = omp.map_info var_ptr(%[[VAL_0]] : !fir.ref>, !fir.array<1024xi32>) map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS]]) -> !fir.ref> {name = "a"} - !CHECK: omp.target map_entries(%[[MAP]] : !fir.ref>) { + !CHECK: %[[MAP1:.*]] = omp.map_info var_ptr(%[[VAL_0]] : !fir.ref>, !fir.array<1024xi32>) map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS]]) -> !fir.ref> {name = "a"} + !CHECK: %[[MAP2:.*]] = omp.map_info var_ptr(%[[VAL_1]] : !fir.ref, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !fir.ref {name = "i"} + !CHECK: omp.target map_entries(%[[MAP1]] -> %[[VAL_2:.*]], %[[MAP2]] -> %[[VAL_3:.*]], %{{.*}} -> %{{.*}} : !fir.ref>, !fir.ref, index) { + !CHECK: ^bb0(%[[VAL_2]]: !fir.ref>, %[[VAL_3]]: !fir.ref, %{{.*}}: index): !CHECK-NEXT: omp.parallel !$omp target parallel do map(tofrom: a) - !CHECK: %[[VAL_2:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} - !CHECK: %[[VAL_3:.*]] = arith.constant 1 : i32 - !CHECK: %[[VAL_4:.*]] = arith.constant 1024 : i32 + !CHECK: %[[VAL_4:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} !CHECK: %[[VAL_5:.*]] = arith.constant 1 : i32 - !CHECK: omp.wsloop for (%[[VAL_6:.*]]) : i32 = (%[[VAL_3]]) to (%[[VAL_4]]) inclusive step (%[[VAL_5]]) { - !CHECK: fir.store %[[VAL_6]] to %[[VAL_2]] : !fir.ref - !CHECK: %[[VAL_7:.*]] = arith.constant 10 : i32 - !CHECK: %[[VAL_8:.*]] = fir.load %[[VAL_2]] : !fir.ref - !CHECK: %[[VAL_9:.*]] = fir.convert %[[VAL_8]] : (i32) -> i64 - !CHECK: %[[VAL_10:.*]] = arith.constant 1 : i64 - !CHECK: %[[VAL_11:.*]] = arith.subi %[[VAL_9]], %[[VAL_10]] : i64 - !CHECK: %[[VAL_12:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_11]] : (!fir.ref>, i64) -> !fir.ref - !CHECK: fir.store %[[VAL_7]] to %[[VAL_12]] : !fir.ref + !CHECK: %[[VAL_6:.*]] = arith.constant 1024 : i32 + !CHECK: %[[VAL_7:.*]] = arith.constant 1 : i32 + !CHECK: omp.wsloop for (%[[VAL_8:.*]]) : i32 = (%[[VAL_5]]) to (%[[VAL_6]]) inclusive step (%[[VAL_7]]) { + !CHECK: fir.store %[[VAL_8]] to %[[VAL_4]] : !fir.ref + !CHECK: %[[VAL_9:.*]] = arith.constant 10 : i32 + !CHECK: %[[VAL_10:.*]] = fir.load %[[VAL_4]] : !fir.ref + !CHECK: %[[VAL_11:.*]] = fir.convert %[[VAL_10]] : (i32) -> i64 + !CHECK: %[[VAL_12:.*]] = arith.constant 1 : i64 + !CHECK: %[[VAL_13:.*]] = arith.subi %[[VAL_11]], %[[VAL_12]] : i64 + !CHECK: %[[VAL_14:.*]] = fir.coordinate_of %[[VAL_2]], %[[VAL_13]] : (!fir.ref>, i64) -> !fir.ref + !CHECK: fir.store %[[VAL_9]] to %[[VAL_14]] : !fir.ref do i = 1, 1024 a(i) = 10 end do @@ -301,4 +371,4 @@ subroutine omp_target_parallel_do !CHECK: omp.terminator !CHECK: } !$omp end target parallel do -end subroutine omp_target_parallel_do + end subroutine omp_target_parallel_do diff --git a/flang/test/Lower/OpenMP/FIR/task.f90 b/flang/test/Lower/OpenMP/FIR/task.f90 index d7419bd1100e69..99a9e3a6b1e35a 100644 --- a/flang/test/Lower/OpenMP/FIR/task.f90 +++ b/flang/test/Lower/OpenMP/FIR/task.f90 @@ -40,34 +40,6 @@ subroutine omp_task_final(bar) !$omp end task end subroutine omp_task_final -!=============================================================================== -! `untied` clause -!=============================================================================== - -!CHECK-LABEL: func @_QPomp_task_untied() { -subroutine omp_task_untied() - !CHECK: omp.task untied { - !$omp task untied - !CHECK: fir.call @_QPfoo() {{.*}}: () -> () - call foo() - !CHECK: omp.terminator - !$omp end task -end subroutine omp_task_untied - -!=============================================================================== -! `mergeable` clause -!=============================================================================== - -!CHECK-LABEL: func @_QPomp_task_mergeable() { -subroutine omp_task_mergeable() - !CHECK: omp.task mergeable { - !$omp task mergeable - !CHECK: fir.call @_QPfoo() {{.*}}: () -> () - call foo() - !CHECK: omp.terminator - !$omp end task -end subroutine omp_task_mergeable - !=============================================================================== ! `priority` clause !=============================================================================== @@ -245,8 +217,8 @@ subroutine task_multiple_clauses() integer :: x, y, z logical :: buzz - !CHECK: omp.task if(%{{.+}}) final(%{{.+}}) untied mergeable priority(%{{.+}}) allocate(%{{.+}} : i32 -> %{{.+}} : !fir.ref) { - !$omp task if(buzz) final(buzz) untied mergeable priority(z) allocate(omp_high_bw_mem_alloc: x) private(x) firstprivate(y) + !CHECK: omp.task if(%{{.+}}) final(%{{.+}}) priority(%{{.+}}) allocate(%{{.+}} : i32 -> %{{.+}} : !fir.ref) { + !$omp task if(buzz) final(buzz) priority(z) allocate(omp_high_bw_mem_alloc: x) private(x) firstprivate(y) !CHECK: %[[x_priv:.+]] = fir.alloca i32 !CHECK: %[[y_priv:.+]] = fir.alloca i32 diff --git a/flang/test/Lower/OpenMP/Todo/task_mergeable.f90 b/flang/test/Lower/OpenMP/Todo/task_mergeable.f90 new file mode 100644 index 00000000000000..13145d92ccf902 --- /dev/null +++ b/flang/test/Lower/OpenMP/Todo/task_mergeable.f90 @@ -0,0 +1,13 @@ +! RUN: %not_todo_cmd bbc -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s +! RUN: %not_todo_cmd %flang_fc1 -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s + +!=============================================================================== +! `mergeable` clause +!=============================================================================== + +! CHECK: not yet implemented: OpenMP Block construct clause +subroutine omp_task_mergeable() + !$omp task mergeable + call foo() + !$omp end task +end subroutine omp_task_mergeable diff --git a/flang/test/Lower/OpenMP/Todo/task_untied.f90 b/flang/test/Lower/OpenMP/Todo/task_untied.f90 new file mode 100644 index 00000000000000..19621c7aac16d6 --- /dev/null +++ b/flang/test/Lower/OpenMP/Todo/task_untied.f90 @@ -0,0 +1,13 @@ +! RUN: %not_todo_cmd bbc -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s +! RUN: %not_todo_cmd %flang_fc1 -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s + +!=============================================================================== +! `untied` clause +!=============================================================================== + +! CHECK: not yet implemented: OpenMP Block construct clause +subroutine omp_task_untied() + !$omp task untied + call foo() + !$omp end task +end subroutine omp_task_untied diff --git a/flang/test/Lower/OpenMP/array-bounds.f90 b/flang/test/Lower/OpenMP/array-bounds.f90 index e65f17f74f5909..d0c584bec6044a 100644 --- a/flang/test/Lower/OpenMP/array-bounds.f90 +++ b/flang/test/Lower/OpenMP/array-bounds.f90 @@ -22,7 +22,7 @@ !HOST: %[[C6:.*]] = arith.constant 4 : index !HOST: %[[BOUNDS1:.*]] = omp.bounds lower_bound(%[[C5]] : index) upper_bound(%[[C6]] : index) stride(%[[C4]] : index) start_idx(%[[C4]] : index) !HOST: %[[MAP1:.*]] = omp.map_info var_ptr(%[[WRITE_DECL]]#1 : !fir.ref>, !fir.array<10xi32>) map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS1]]) -> !fir.ref> {name = "sp_write(2:5)"} -!HOST: omp.target map_entries(%[[MAP0]], %[[MAP1]] : !fir.ref>, !fir.ref>) { +!HOST: omp.target map_entries(%[[MAP0]] -> %{{.*}}, %[[MAP1]] -> %{{.*}}, {{.*}} -> {{.*}}, {{.*}} -> {{.*}}, {{.*}} -> {{.*}} : !fir.ref>, !fir.ref>, !fir.ref, index, index) { subroutine read_write_section() integer :: sp_read(10) = (/1,2,3,4,5,6,7,8,9,10/) @@ -50,7 +50,7 @@ module assumed_array_routines !HOST: %[[BOUNDS:.*]] = omp.bounds lower_bound(%[[C3]] : index) upper_bound(%[[C4]] : index) stride(%[[C2]]#2 : index) start_idx(%[[C0]] : index) {stride_in_bytes = true} !HOST: %[[ADDROF:.*]] = fir.box_addr %[[ARG0_DECL]]#1 : (!fir.box>) -> !fir.ref> !HOST: %[[MAP:.*]] = omp.map_info var_ptr(%[[ADDROF]] : !fir.ref>, !fir.array) map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS]]) -> !fir.ref> {name = "arr_read_write(2:5)"} -!HOST: omp.target map_entries(%[[MAP]] : !fir.ref>) { +!HOST: omp.target map_entries(%[[MAP]] -> %{{.*}}, {{.*}} -> {{.*}} : !fir.ref>, !fir.ref) { subroutine assumed_shape_array(arr_read_write) integer, intent(inout) :: arr_read_write(:) @@ -73,7 +73,7 @@ end subroutine assumed_shape_array !HOST: %[[C2:.*]] = arith.constant 4 : index !HOST: %[[BOUNDS:.*]] = omp.bounds lower_bound(%[[C1]] : index) upper_bound(%[[C2]] : index) stride(%[[C0]] : index) start_idx(%[[C0]] : index) !HOST: %[[MAP:.*]] = omp.map_info var_ptr(%[[ARG0_DECL]]#1 : !fir.ref>, !fir.array) map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS]]) -> !fir.ref> {name = "arr_read_write(2:5)"} -!HOST: omp.target map_entries(%[[MAP]] : !fir.ref>) { +!HOST: omp.target map_entries(%[[MAP]] -> %{{.*}}, {{.*}} -> {{.*}}, {{.*}} -> {{.*}} : !fir.ref>, !fir.ref, index) { subroutine assumed_size_array(arr_read_write) integer, intent(inout) :: arr_read_write(*) diff --git a/flang/test/Lower/OpenMP/function-filtering-2.f90 b/flang/test/Lower/OpenMP/function-filtering-2.f90 index 8219be5ad1e40c..17cd0d44c01b4b 100644 --- a/flang/test/Lower/OpenMP/function-filtering-2.f90 +++ b/flang/test/Lower/OpenMP/function-filtering-2.f90 @@ -26,9 +26,7 @@ subroutine no_declaretarget() end subroutine no_declaretarget ! MLIR-HOST: func.func @{{.*}}main( -! MLIR-HOST-NOT: func.func @{{.*}}main_omp_outline{{.*}}() ! MLIR-DEVICE-NOT: func.func @{{.*}}main( -! MLIR-DEVICE: func.func @{{.*}}main_omp_outline{{.*}}() attributes {omp.declare_target = #omp.declaretarget, omp.outline_parent_name = "_QQmain"} ! MLIR-ALL: return ! LLVM-HOST: define {{.*}} @{{.*}}main{{.*}}( diff --git a/flang/test/Lower/OpenMP/function-filtering.f90 b/flang/test/Lower/OpenMP/function-filtering.f90 index 3de14aa4709fc4..e550348e50692c 100644 --- a/flang/test/Lower/OpenMP/function-filtering.f90 +++ b/flang/test/Lower/OpenMP/function-filtering.f90 @@ -34,14 +34,9 @@ end function host_fn ! MLIR-HOST: func.func @{{.*}}target_subr( ! MLIR-HOST: return -! MLIR-HOST-NOT: func.func @{{.*}}target_subr_omp_outline_0( -! MLIR-DEVICE-NOT: func.func @{{.*}}target_subr( -! MLIR-DEVICE: func.func @{{.*}}target_subr_omp_outline_0( ! MLIR-DEVICE: return -! LLVM-ALL-NOT: define {{.*}} @{{.*}}target_subr_omp_outline_0{{.*}}( ! LLVM-HOST: define {{.*}} @{{.*}}target_subr{{.*}}( -! LLVM-DEVICE-NOT: {{.*}} @{{.*}}target_subr{{.*}}( ! LLVM-ALL: define {{.*}} @__omp_offloading_{{.*}}_{{.*}}_target_subr__{{.*}}( subroutine target_subr(x) integer, intent(out) :: x diff --git a/flang/test/Lower/OpenMP/location.f90 b/flang/test/Lower/OpenMP/location.f90 index c87bf038e96721..1e01a4828dd9e1 100644 --- a/flang/test/Lower/OpenMP/location.f90 +++ b/flang/test/Lower/OpenMP/location.f90 @@ -17,7 +17,7 @@ subroutine sub_parallel() !CHECK-LABEL: sub_target subroutine sub_target() print *, x -!CHECK: omp.target { +!CHECK: omp.target {{.*}} { !$omp target print *, x !CHECK: omp.terminator loc(#[[TAR_LOC:.*]]) diff --git a/flang/test/Lower/OpenMP/parallel-private-clause.f90 b/flang/test/Lower/OpenMP/parallel-private-clause.f90 index 5fc77f206250ef..3e46d315f8cc47 100644 --- a/flang/test/Lower/OpenMP/parallel-private-clause.f90 +++ b/flang/test/Lower/OpenMP/parallel-private-clause.f90 @@ -142,13 +142,13 @@ subroutine private_clause_derived_type() !FIRDialect: omp.parallel { !FIRDialect-DAG: %[[X_PVT:.*]] = fir.alloca !fir.box> {bindc_name = "x", pinned, uniq_name = "{{.*}}Ex"} -!FIRDialect-DAG: %[[X_PVT_DECL:.*]]:2 = hlfir.declare %[[X_PVT]] {uniq_name = "{{.*}}Ex"} : (!fir.ref>>) -> (!fir.ref>>, !fir.ref>>) +!FIRDialect-DAG: %[[X_PVT_DECL:.*]]:2 = hlfir.declare %[[X_PVT]] {fortran_attrs = #fir.var_attrs, uniq_name = "{{.*}}Ex"} : (!fir.ref>>) -> (!fir.ref>>, !fir.ref>>) !FIRDialect-DAG: %[[X2_PVT:.*]] = fir.alloca !fir.box>> {bindc_name = "x2", pinned, uniq_name = "{{.*}}Ex2"} -!FIRDialect-DAG: %[[X2_PVT_DECL:.*]]:2 = hlfir.declare %[[X2_PVT]] {uniq_name = "{{.*}}Ex2"} : (!fir.ref>>>) -> (!fir.ref>>>, !fir.ref>>>) +!FIRDialect-DAG: %[[X2_PVT_DECL:.*]]:2 = hlfir.declare %[[X2_PVT]] {fortran_attrs = #fir.var_attrs, uniq_name = "{{.*}}Ex2"} : (!fir.ref>>>) -> (!fir.ref>>>, !fir.ref>>>) !FIRDialect-DAG: %[[X3_PVT:.*]] = fir.alloca !fir.box> {bindc_name = "x3", pinned, uniq_name = "{{.*}}Ex3"} -!FIRDialect-DAG: %[[X3_PVT_DECL:.*]]:2 = hlfir.declare %[[X3_PVT]] {uniq_name = "{{.*}}Ex3"} : (!fir.ref>>) -> (!fir.ref>>, !fir.ref>>) +!FIRDialect-DAG: %[[X3_PVT_DECL:.*]]:2 = hlfir.declare %[[X3_PVT]] {fortran_attrs = #fir.var_attrs, uniq_name = "{{.*}}Ex3"} : (!fir.ref>>) -> (!fir.ref>>, !fir.ref>>) !FIRDialect-DAG: %[[X4_PVT:.*]] = fir.alloca !fir.box>> {bindc_name = "x4", pinned, uniq_name = "{{.*}}Ex4"} -!FIRDialect-DAG: %[[X4_PVT_DECL:.*]]:2 = hlfir.declare %[[X4_PVT]] {uniq_name = "{{.*}}Ex4"} : (!fir.ref>>>) -> (!fir.ref>>>, !fir.ref>>>) +!FIRDialect-DAG: %[[X4_PVT_DECL:.*]]:2 = hlfir.declare %[[X4_PVT]] {fortran_attrs = #fir.var_attrs, uniq_name = "{{.*}}Ex4"} : (!fir.ref>>>) -> (!fir.ref>>>, !fir.ref>>>) !FIRDialect-DAG: %[[TMP58:.*]] = fir.load %[[X4_DECL]]#1 : !fir.ref>>> !FIRDialect-DAG: %[[TMP97:.*]] = fir.load %[[X4_DECL]]#1 : !fir.ref>>> @@ -190,7 +190,7 @@ subroutine private_clause_allocatable() !FIRDialect-DAG: fir.store %{{.*}} to %[[X5_PVT]] : !fir.ref>> !FIRDialect-DAG: } -!FIRDialect-DAG: %[[X5_PVT_DECL:.*]]:2 = hlfir.declare %[[X5_PVT]] {uniq_name = "_QFprivate_clause_real_call_allocatableEx5"} : (!fir.ref>>) -> (!fir.ref>>, !fir.ref>>) +!FIRDialect-DAG: %[[X5_PVT_DECL:.*]]:2 = hlfir.declare %[[X5_PVT]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QFprivate_clause_real_call_allocatableEx5"} : (!fir.ref>>) -> (!fir.ref>>, !fir.ref>>) !FIRDialect-DAG: fir.call @_QFprivate_clause_real_call_allocatablePhelper_private_clause_real_call_allocatable(%[[X5_PVT_DECL]]#0) fastmath : (!fir.ref>>) -> () !FIRDialect-DAG: %{{.*}} = fir.load %[[X5_PVT_DECL]]#1 : !fir.ref>> @@ -222,7 +222,7 @@ subroutine helper_private_clause_real_call_allocatable(x6) !FIRDialect: %[[P_DECL:.*]]:2 = hlfir.declare %[[P]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QFincrement_list_itemsEp"} : (!fir.ref>>}>>>>) -> (!fir.ref>>}>>>>, !fir.ref>>}>>>>) !FIRDialect: omp.parallel { !FIRDialect: %[[P_PVT:.*]] = fir.alloca !fir.box>>}>>> {bindc_name = "p", pinned, uniq_name = "_QFincrement_list_itemsEp"} -!FIRDialect: %[[P_PVT_DECL:.*]]:2 = hlfir.declare %[[P_PVT]] {uniq_name = "_QFincrement_list_itemsEp"} : (!fir.ref>>}>>>>) -> (!fir.ref>>}>>>>, !fir.ref>>}>>>>) +!FIRDialect: %[[P_PVT_DECL:.*]]:2 = hlfir.declare %[[P_PVT]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QFincrement_list_itemsEp"} : (!fir.ref>>}>>>>) -> (!fir.ref>>}>>>>, !fir.ref>>}>>>>) !FIRDialect: omp.single { !FIRDialect: omp.terminator @@ -262,9 +262,9 @@ end subroutine increment_list_items !FIRDialect-DAG: %[[Z2_DECL:.*]]:2 = hlfir.declare %[[Z2]](%12) {fortran_attrs = #fir.var_attrs, uniq_name = "_QFparallel_pointerEz2"} : (!fir.ref>, !fir.shape<1>) -> (!fir.ref>, !fir.ref>) !FIRDialect: omp.parallel { !FIRDialect-DAG: %[[Y1_PVT:.*]] = fir.alloca !fir.box> {bindc_name = "y1", pinned, uniq_name = "_QFparallel_pointerEy1"} -!FIRDialect-DAG: %[[Y1_PVT_DECL:.*]]:2 = hlfir.declare %[[Y1_PVT]] {uniq_name = "_QFparallel_pointerEy1"} : (!fir.ref>>) -> (!fir.ref>>, !fir.ref>>) +!FIRDialect-DAG: %[[Y1_PVT_DECL:.*]]:2 = hlfir.declare %[[Y1_PVT]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QFparallel_pointerEy1"} : (!fir.ref>>) -> (!fir.ref>>, !fir.ref>>) !FIRDialect-DAG: %[[Y2_PVT:.*]] = fir.alloca !fir.box>> {bindc_name = "y2", pinned, uniq_name = "_QFparallel_pointerEy2"} -!FIRDialect-DAG: %[[Y2_PVT_DECL:.*]]:2 = hlfir.declare %[[Y2_PVT]] {uniq_name = "_QFparallel_pointerEy2"} : (!fir.ref>>>) -> (!fir.ref>>>, !fir.ref>>>) +!FIRDialect-DAG: %[[Y2_PVT_DECL:.*]]:2 = hlfir.declare %[[Y2_PVT]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QFparallel_pointerEy2"} : (!fir.ref>>>) -> (!fir.ref>>>, !fir.ref>>>) !FIRDialect-DAG: %[[PP18:.*]] = fir.embox %[[Z1_DECL]]#1 : (!fir.ref) -> !fir.box> !FIRDialect: fir.store %[[PP18]] to %[[Y1_PVT_DECL]]#1 : !fir.ref>> !FIRDialect-DAG: %[[PP19:.*]] = fir.shape %c10 : (index) -> !fir.shape<1> @@ -298,7 +298,7 @@ subroutine simple_loop_1 ! FIRDialect: [[R:%.*]] = fir.alloca !fir.box> {bindc_name = "r", pinned, uniq_name = "{{.*}}Er"} ! FIRDialect: fir.store {{%.*}} to [[R]] : !fir.ref>> ! FIRDialect: fir.store {{%.*}} to [[R]] : !fir.ref>> - ! FIRDialect: %[[R_DECL:.*]]:2 = hlfir.declare [[R]] {uniq_name = "_QFsimple_loop_1Er"} : (!fir.ref>>) -> (!fir.ref>>, !fir.ref>>) + ! FIRDialect: %[[R_DECL:.*]]:2 = hlfir.declare [[R]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QFsimple_loop_1Er"} : (!fir.ref>>) -> (!fir.ref>>, !fir.ref>>) ! FIRDialect: %[[WS_LB:.*]] = arith.constant 1 : i32 ! FIRDialect: %[[WS_UB:.*]] = arith.constant 9 : i32 @@ -336,7 +336,7 @@ subroutine simple_loop_2 ! FIRDialect: [[R:%.*]] = fir.alloca !fir.box> {bindc_name = "r", pinned, uniq_name = "{{.*}}Er"} ! FIRDialect: fir.store {{%.*}} to [[R]] : !fir.ref>> ! FIRDialect: fir.store {{%.*}} to [[R]] : !fir.ref>> - ! FIRDialect: %[[R_DECL:.*]]:2 = hlfir.declare [[R]] {uniq_name = "{{.*}}Er"} : (!fir.ref>>) -> (!fir.ref>>, !fir.ref>>) + ! FIRDialect: %[[R_DECL:.*]]:2 = hlfir.declare [[R]] {fortran_attrs = #fir.var_attrs, uniq_name = "{{.*}}Er"} : (!fir.ref>>) -> (!fir.ref>>, !fir.ref>>) ! FIRDialect: %[[WS_LB:.*]] = arith.constant 1 : i32 ! FIRDialect: %[[WS_UB:.*]] = arith.constant 9 : i32 @@ -373,7 +373,7 @@ subroutine simple_loop_3 ! FIRDialect: [[R:%.*]] = fir.alloca !fir.box> {bindc_name = "r", pinned, uniq_name = "{{.*}}Er"} ! FIRDialect: fir.store {{%.*}} to [[R]] : !fir.ref>> ! FIRDialect: fir.store {{%.*}} to [[R]] : !fir.ref>> - ! FIRDialect: [[R_DECL:%.*]]:2 = hlfir.declare [[R]] {uniq_name = "{{.*}}Er"} : (!fir.ref>>) -> (!fir.ref>>, !fir.ref>>) + ! FIRDialect: [[R_DECL:%.*]]:2 = hlfir.declare [[R]] {fortran_attrs = #fir.var_attrs, uniq_name = "{{.*}}Er"} : (!fir.ref>>) -> (!fir.ref>>, !fir.ref>>) ! FIRDialect: %[[WS_LB:.*]] = arith.constant 1 : i32 ! FIRDialect: %[[WS_UB:.*]] = arith.constant 9 : i32 @@ -405,7 +405,7 @@ subroutine simd_loop_1 ! FIRDialect: [[R:%.*]] = fir.alloca !fir.box> {bindc_name = "r", pinned, uniq_name = "{{.*}}Er"} ! FIRDialect: fir.store {{%.*}} to [[R]] : !fir.ref>> ! FIRDialect: fir.store {{%.*}} to [[R]] : !fir.ref>> - ! FIRDialect: [[R_DECL:%.*]]:2 = hlfir.declare [[R]] {uniq_name = "{{.*}}r"} : (!fir.ref>>) -> (!fir.ref>>, !fir.ref>>) + ! FIRDialect: [[R_DECL:%.*]]:2 = hlfir.declare [[R]] {fortran_attrs = #fir.var_attrs, uniq_name = "{{.*}}r"} : (!fir.ref>>) -> (!fir.ref>>, !fir.ref>>) ! FIRDialect: %[[LB:.*]] = arith.constant 1 : i32 ! FIRDialect: %[[UB:.*]] = arith.constant 9 : i32 diff --git a/flang/test/Lower/OpenMP/private-commonblock.f90 b/flang/test/Lower/OpenMP/private-commonblock.f90 index 615ddb21129c91..ee580594f7c3fc 100644 --- a/flang/test/Lower/OpenMP/private-commonblock.f90 +++ b/flang/test/Lower/OpenMP/private-commonblock.f90 @@ -100,7 +100,7 @@ subroutine private_clause_commonblock() !CHECK: fir.call @_QPsub4(%[[C_REF]], %[[A_DECL]]#1) fastmath : (!fir.ref>, !fir.ref) -> () !CHECK: omp.parallel { !CHECK: %[[C_PVT_REF:.*]] = fir.alloca !fir.box>> {bindc_name = "c", pinned, uniq_name = "_QFprivate_clause_commonblock_pointerEc"} -!CHECK: %[[C_PVT_DECL:.*]]:2 = hlfir.declare %[[C_PVT_REF]] {uniq_name = "_QFprivate_clause_commonblock_pointerEc"} : (!fir.ref>>>) -> (!fir.ref>>>, !fir.ref>>>) +!CHECK: %[[C_PVT_DECL:.*]]:2 = hlfir.declare %[[C_PVT_REF]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QFprivate_clause_commonblock_pointerEc"} : (!fir.ref>>>) -> (!fir.ref>>>, !fir.ref>>>) !CHECK: %[[A_PVT_REF:.*]] = fir.alloca i32 {bindc_name = "a", pinned, uniq_name = "_QFprivate_clause_commonblock_pointerEa"} !CHECK: %[[A_PVT_DECL:.*]]:2 = hlfir.declare %[[A_PVT_REF]] {uniq_name = "_QFprivate_clause_commonblock_pointerEa"} : (!fir.ref) -> (!fir.ref, !fir.ref) !CHECK: %[[C_PVT_BOX:.*]] = fir.load %[[C_PVT_DECL]]#0 : !fir.ref>>> diff --git a/flang/test/Lower/OpenMP/target.f90 b/flang/test/Lower/OpenMP/target.f90 index 754e13d1a36b50..86f456b847df90 100644 --- a/flang/test/Lower/OpenMP/target.f90 +++ b/flang/test/Lower/OpenMP/target.f90 @@ -187,23 +187,112 @@ end subroutine omp_target_data_mt !CHECK-LABEL: func.func @_QPomp_target() { subroutine omp_target - !CHECK: %[[VAL_0:.*]] = fir.alloca !fir.array<1024xi32> {bindc_name = "a", uniq_name = "_QFomp_targetEa"} - !CHECK: %[[VAL_0_DECL:.*]]:2 = hlfir.declare %[[VAL_0]](%{{.*}}) {uniq_name = "_QFomp_targetEa"} : (!fir.ref>, !fir.shape<1>) -> (!fir.ref>, !fir.ref>) + !CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFomp_targetEa"} : (!fir.ref>, !fir.shape<1>) -> (!fir.ref>, !fir.ref>) integer :: a(1024) !CHECK: %[[BOUNDS:.*]] = omp.bounds lower_bound({{.*}}) upper_bound({{.*}}) extent({{.*}}) stride({{.*}}) start_idx({{.*}}) - !CHECK: %[[MAP:.*]] = omp.map_info var_ptr(%[[VAL_0_DECL]]#1 : !fir.ref>, !fir.array<1024xi32>) map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS]]) -> !fir.ref> {name = "a"} - !CHECK: omp.target map_entries(%[[MAP]] : !fir.ref>) { + !CHECK: %[[MAP:.*]] = omp.map_info var_ptr(%[[VAL_1]]#1 : !fir.ref>, !fir.array<1024xi32>) map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS]]) -> !fir.ref> {name = "a"} + !CHECK: omp.target map_entries(%[[MAP]] -> %[[ARG_0:.*]], %{{.*}} -> %[[ARG_1:.*]] : !fir.ref>, index) { + !CHECK: ^bb0(%[[ARG_0]]: !fir.ref>, %[[ARG_1]]: index): !$omp target map(tofrom: a) - !CHECK: %[[C10:.*]] = arith.constant 10 : i32 - !CHECK: %[[C1:.*]] = arith.constant 1 : index - !CHECK: %[[A_1:.*]] = hlfir.designate %[[VAL_0_DECL]]#0 (%[[C1]]) : (!fir.ref>, index) -> !fir.ref - !CHECK: hlfir.assign %[[C10]] to %[[A_1]] : i32, !fir.ref + !CHECK: %[[VAL_2:.*]] = fir.shape %[[ARG_1]] : (index) -> !fir.shape<1> + !CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[ARG_0]](%[[VAL_2]]) {uniq_name = "_QFomp_targetEa"} : (!fir.ref>, !fir.shape<1>) -> (!fir.ref>, !fir.ref>) + !CHECK: %[[VAL_4:.*]] = arith.constant 10 : i32 + !CHECK: %[[VAL_5:.*]] = arith.constant 1 : index + !CHECK: %[[VAL_6:.*]] = hlfir.designate %[[VAL_3]]#0 (%[[VAL_5]]) : (!fir.ref>, index) -> !fir.ref + !CHECK: hlfir.assign %[[VAL_4]] to %[[VAL_6]] : i32, !fir.ref a(1) = 10 - !CHECK: omp.terminator + !CHECK: omp.terminator !$omp end target !CHECK: } end subroutine omp_target +!=============================================================================== +! Target implicit capture +!=============================================================================== + +!CHECK-LABEL: func.func @_QPomp_target_implicit() { +subroutine omp_target_implicit + !CHECK: %[[VAL_0:.*]] = arith.constant 1024 : index + !CHECK: %[[VAL_1:.*]] = fir.alloca !fir.array<1024xi32> {bindc_name = "a", uniq_name = "_QFomp_target_implicitEa"} + !CHECK: %[[VAL_2:.*]] = fir.shape %[[VAL_0]] : (index) -> !fir.shape<1> + !CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_1]](%[[VAL_2]]) {uniq_name = "_QFomp_target_implicitEa"} : (!fir.ref>, !fir.shape<1>) -> (!fir.ref>, !fir.ref>) + integer :: a(1024) + !CHECK: %[[VAL_4:.*]] = omp.map_info var_ptr(%[[VAL_3]]#1 : !fir.ref>, !fir.array<1024xi32>) map_clauses(implicit, tofrom) capture(ByRef) bounds(%{{.*}}) -> !fir.ref> {name = "a"} + !CHECK: %[[VAL_5:.*]] = omp.map_info val(%[[VAL_0]] : index) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> index {name = ""} + !CHECK: omp.target map_entries(%[[VAL_4]] -> %[[VAL_6:.*]], %[[VAL_5]] -> %[[VAL_7:.*]] : !fir.ref>, index) { + !CHECK: ^bb0(%[[VAL_6]]: !fir.ref>, %[[VAL_7]]: index): + !$omp target + !CHECK: %[[VAL_8:.*]] = fir.shape %[[VAL_7]] : (index) -> !fir.shape<1> + !CHECK: %[[VAL_9:.*]]:2 = hlfir.declare %[[VAL_6]](%[[VAL_8]]) {uniq_name = "_QFomp_target_implicitEa"} : (!fir.ref>, !fir.shape<1>) -> (!fir.ref>, !fir.ref>) + !CHECK: %[[VAL_10:.*]] = arith.constant 10 : i32 + !CHECK: %[[VAL_11:.*]] = arith.constant 1 : index + !CHECK: %[[VAL_12:.*]] = hlfir.designate %[[VAL_9]]#0 (%[[VAL_11]]) : (!fir.ref>, index) -> !fir.ref + !CHECK: hlfir.assign %[[VAL_10]] to %[[VAL_12]] : i32, !fir.ref + a(1) = 10 + !CHECK: omp.terminator + !$omp end target + !CHECK: } +end subroutine omp_target_implicit + +!=============================================================================== +! Target implicit capture nested +!=============================================================================== + +!CHECK-LABEL: func.func @_QPomp_target_implicit_nested() { +subroutine omp_target_implicit_nested + integer::a, b + !CHECK: omp.target map_entries(%{{.*}} -> %[[ARG0:.*]], %{{.*}} -> %[[ARG1:.*]] : !fir.ref, !fir.ref) { + !CHECK: ^bb0(%[[ARG0]]: !fir.ref, %[[ARG1]]: !fir.ref): + !$omp target + !CHECK: %[[VAL_8:.*]]:2 = hlfir.declare %[[ARG0]] {uniq_name = "_QFomp_target_implicit_nestedEa"} : (!fir.ref) -> (!fir.ref, !fir.ref) + !CHECK: %[[VAL_9:.*]]:2 = hlfir.declare %[[ARG1]] {uniq_name = "_QFomp_target_implicit_nestedEb"} : (!fir.ref) -> (!fir.ref, !fir.ref) + !CHECK: %[[VAL_10:.*]] = arith.constant 10 : i32 + !CHECK: hlfir.assign %[[VAL_10]] to %[[VAL_8]]#0 : i32, !fir.ref + a = 10 + !CHECK: omp.parallel + !$omp parallel + !CHECK: %[[VAL_11:.*]] = arith.constant 20 : i32 + !CHECK: hlfir.assign %[[VAL_11]] to %[[VAL_9]]#0 : i32, !fir.ref + b = 20 + !CHECK: omp.terminator + !$omp end parallel + !CHECK: omp.terminator + !$omp end target + !CHECK: } +end subroutine omp_target_implicit_nested + +!=============================================================================== +! Target implicit capture with bounds +!=============================================================================== + +!CHECK-LABEL: func.func @_QPomp_target_implicit_bounds(%{{.*}}: !fir.ref {fir.bindc_name = "n"}) { +subroutine omp_target_implicit_bounds(n) + !CHECK: %[[VAL_1:.*]] = arith.select %{{.*}}, %{{.*}}, %{{.*}} : index + !CHECK: %[[VAL_2:.*]] = arith.select %{{.*}}, %{{.*}}, %{{.*}} : index + !CHECK: %[[VAL_3:.*]] = fir.alloca !fir.array, %[[VAL_1]] {bindc_name = "a", uniq_name = "_QFomp_target_implicit_boundsEa"} + !CHECK: %[[VAL_4:.*]] = fir.shape %[[VAL_1]], %[[VAL_2]] : (index, index) -> !fir.shape<2> + !CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_3]](%[[VAL_4]]) {uniq_name = "_QFomp_target_implicit_boundsEa"} : (!fir.ref>, !fir.shape<2>) -> (!fir.box>, !fir.ref>) + integer :: n + integer :: a(n, 1024) + !CHECK: %[[VAL_6:.*]] = omp.map_info var_ptr(%[[VAL_5]]#1 : !fir.ref>, !fir.array) map_clauses(implicit, tofrom) capture(ByRef) bounds(%{{.*}}) -> !fir.ref> {name = "a"} + !CHECK: %[[VAL_7:.*]] = omp.map_info val(%[[VAL_1]] : index) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> index {name = ""} + !CHECK: %[[VAL_8:.*]] = omp.map_info val(%[[VAL_2]] : index) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> index {name = ""} + !CHECK: omp.target map_entries(%[[VAL_6]] -> %[[ARG_1:.*]], %[[VAL_7]] -> %[[ARG_2:.*]], %[[VAL_8]] -> %[[ARG_3:.*]] : !fir.ref>, index, index) { + !CHECK: ^bb0(%[[ARG_1]]: !fir.ref>, %[[ARG_2]]: index, %[[ARG_3]]: index): + !$omp target + !CHECK: %[[VAL_9:.*]] = fir.shape %[[ARG_2]], %[[ARG_3]] : (index, index) -> !fir.shape<2> + !CHECK: %[[VAL_10:.*]]:2 = hlfir.declare %[[ARG_1]](%[[VAL_9]]) {uniq_name = "_QFomp_target_implicit_boundsEa"} : (!fir.ref>, !fir.shape<2>) -> (!fir.box>, !fir.ref>) + !CHECK: %[[VAL_11:.*]] = arith.constant 33 : i32 + !CHECK: %[[VAL_12:.*]] = arith.constant 11 : index + !CHECK: %[[VAL_13:.*]] = arith.constant 22 : index + !CHECK: %[[VAL_14:.*]] = hlfir.designate %[[VAL_10]]#0 (%[[VAL_12]], %[[VAL_13]]) : (!fir.box>, index, index) -> !fir.ref + !CHECK: hlfir.assign %[[VAL_11]] to %[[VAL_14]] : i32, !fir.ref + a(11, 22) = 33 + !CHECK: omp.terminator + !$omp end target +!CHECK: } +end subroutine omp_target_implicit_bounds + !=============================================================================== ! Target `thread_limit` clause !=============================================================================== @@ -213,7 +302,8 @@ subroutine omp_target_thread_limit integer :: a !CHECK: %[[VAL_1:.*]] = arith.constant 64 : i32 !CHECK: %[[MAP:.*]] = omp.map_info var_ptr({{.*}}) map_clauses(tofrom) capture(ByRef) -> !fir.ref {name = "a"} - !CHECK: omp.target thread_limit(%[[VAL_1]] : i32) map_entries(%[[MAP]] : !fir.ref) { + !CHECK: omp.target thread_limit(%[[VAL_1]] : i32) map_entries(%[[MAP]] -> %{{.*}} : !fir.ref) { + !CHECK: ^bb0(%{{.*}}: !fir.ref): !$omp target map(tofrom: a) thread_limit(64) a = 10 !CHECK: omp.terminator @@ -254,7 +344,7 @@ subroutine omp_target_device_addr !CHECK: omp.target_data map_entries(%[[MAP]] : {{.*}}) use_device_addr(%[[VAL_0_DECL]]#1 : !fir.ref>>) { !$omp target data map(tofrom: a) use_device_addr(a) !CHECK: ^bb0(%[[VAL_1:.*]]: !fir.ref>>): - !CHECK: %[[VAL_1_DECL:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "_QFomp_target_device_addrEa"} : (!fir.ref>>) -> (!fir.ref>>, !fir.ref>>) + !CHECK: %[[VAL_1_DECL:.*]]:2 = hlfir.declare %[[VAL_1]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QFomp_target_device_addrEa"} : (!fir.ref>>) -> (!fir.ref>>, !fir.ref>>) !CHECK: %[[C10:.*]] = arith.constant 10 : i32 !CHECK: %[[A_BOX:.*]] = fir.load %[[VAL_1_DECL]]#0 : !fir.ref>> !CHECK: %[[A_ADDR:.*]] = fir.box_addr %[[A_BOX]] : (!fir.box>) -> !fir.ptr @@ -306,8 +396,10 @@ subroutine omp_target_parallel_do !CHECK: %[[SUB:.*]] = arith.subi %[[C1024]], %[[C1]] : index !CHECK: %[[BOUNDS:.*]] = omp.bounds lower_bound(%[[C0]] : index) upper_bound(%[[SUB]] : index) extent(%[[C1024]] : index) stride(%[[C1]] : index) start_idx(%[[C1]] : index) !CHECK: %[[MAP:.*]] = omp.map_info var_ptr(%[[VAL_0_DECL]]#1 : !fir.ref>, !fir.array<1024xi32>) map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS]]) -> !fir.ref> {name = "a"} - !CHECK: omp.target map_entries(%[[MAP]] : !fir.ref>) { - !CHECK-NEXT: omp.parallel + !CHECK: omp.target map_entries(%[[MAP]] -> %[[ARG_0:.*]], %{{.*}} -> %{{.*}}, %{{.*}} -> %{{.*}} : !fir.ref>, !fir.ref, index) { + !CHECK: ^bb0(%[[ARG_0]]: !fir.ref>, %{{.*}}: !fir.ref, %{{.*}}: index): + !CHECK: %[[VAL_0_DECL:.*]]:2 = hlfir.declare %[[ARG_0]](%{{.*}}) {uniq_name = "_QFomp_target_parallel_doEa"} : (!fir.ref>, !fir.shape<1>) -> (!fir.ref>, !fir.ref>) + !CHECK: omp.parallel !$omp target parallel do map(tofrom: a) !CHECK: %[[I_PVT_ALLOCA:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} !CHECK: %[[I_PVT_DECL:.*]]:2 = hlfir.declare %[[I_PVT_ALLOCA]] {uniq_name = "_QFomp_target_parallel_doEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) @@ -329,3 +421,31 @@ subroutine omp_target_parallel_do !CHECK: } !$omp end target parallel do end subroutine omp_target_parallel_do + +!=============================================================================== +! Target with unstructured code +!=============================================================================== + +!CHECK-LABEL: func.func @_QPtarget_unstructured() { +subroutine target_unstructured + !CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtarget_unstructuredEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) + integer :: i = 1 + !CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtarget_unstructuredEj"} : (!fir.ref) -> (!fir.ref, !fir.ref) + integer :: j = 11 + !CHECK: %[[VAL_4:.*]] = omp.map_info var_ptr(%[[VAL_1]]#1 : !fir.ref, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !fir.ref {name = "i"} + !CHECK: %[[VAL_5:.*]] = omp.map_info var_ptr(%[[VAL_3]]#1 : !fir.ref, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !fir.ref {name = "j"} + !CHECK: omp.target map_entries(%[[VAL_4]] -> %[[VAL_6:.*]], %[[VAL_5]] -> %[[VAL_7:.*]] : !fir.ref, !fir.ref) { + !CHECK: ^bb0(%[[VAL_6]]: !fir.ref, %[[VAL_7]]: !fir.ref): + !$omp target + !CHECK: %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_6]] {uniq_name = "_QFtarget_unstructuredEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) + !CHECK: %[[VAL_9:.*]]:2 = hlfir.declare %[[VAL_7]] {uniq_name = "_QFtarget_unstructuredEj"} : (!fir.ref) -> (!fir.ref, !fir.ref) + !CHECK: ^bb1: + do while (i <= j) + !CHECK: ^bb2: + i = i + 1 + end do + !CHECK: ^bb3: + !CHECK: omp.terminator + !$omp end target + !CHECK: } +end subroutine target_unstructured diff --git a/flang/test/Lower/OpenMP/task.f90 b/flang/test/Lower/OpenMP/task.f90 index 99b1740ca75a81..7d7a79af3185f5 100644 --- a/flang/test/Lower/OpenMP/task.f90 +++ b/flang/test/Lower/OpenMP/task.f90 @@ -40,34 +40,6 @@ subroutine omp_task_final(bar) !$omp end task end subroutine omp_task_final -!=============================================================================== -! `untied` clause -!=============================================================================== - -!CHECK-LABEL: func @_QPomp_task_untied() { -subroutine omp_task_untied() - !CHECK: omp.task untied { - !$omp task untied - !CHECK: fir.call @_QPfoo() {{.*}}: () -> () - call foo() - !CHECK: omp.terminator - !$omp end task -end subroutine omp_task_untied - -!=============================================================================== -! `mergeable` clause -!=============================================================================== - -!CHECK-LABEL: func @_QPomp_task_mergeable() { -subroutine omp_task_mergeable() - !CHECK: omp.task mergeable { - !$omp task mergeable - !CHECK: fir.call @_QPfoo() {{.*}}: () -> () - call foo() - !CHECK: omp.terminator - !$omp end task -end subroutine omp_task_mergeable - !=============================================================================== ! `priority` clause !=============================================================================== @@ -253,8 +225,8 @@ subroutine task_multiple_clauses() integer :: x, y, z logical :: buzz - !CHECK: omp.task if(%{{.+}}) final(%{{.+}}) untied mergeable priority(%{{.+}}) allocate(%{{.+}} : i32 -> %{{.+}} : !fir.ref) { - !$omp task if(buzz) final(buzz) untied mergeable priority(z) allocate(omp_high_bw_mem_alloc: x) private(x) firstprivate(y) + !CHECK: omp.task if(%{{.+}}) final(%{{.+}}) priority(%{{.+}}) allocate(%{{.+}} : i32 -> %{{.+}} : !fir.ref) { + !$omp task if(buzz) final(buzz) priority(z) allocate(omp_high_bw_mem_alloc: x) private(x) firstprivate(y) !CHECK: %[[X_PRIV_ALLOCA:.+]] = fir.alloca i32 {bindc_name = "x", pinned, uniq_name = "_QFtask_multiple_clausesEx"} !CHECK: %[[X_PRIV:.+]]:2 = hlfir.declare %[[X_PRIV_ALLOCA]] {uniq_name = "_QFtask_multiple_clausesEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) diff --git a/libc/src/string/memory_utils/generic/aligned_access.h b/libc/src/string/memory_utils/generic/aligned_access.h index 4238167e54d644..65bc63f6cbe557 100644 --- a/libc/src/string/memory_utils/generic/aligned_access.h +++ b/libc/src/string/memory_utils/generic/aligned_access.h @@ -28,7 +28,7 @@ namespace LIBC_NAMESPACE { return load32_aligned(ptr, offset); else if (alignment == 2) return load32_aligned(ptr, offset); - else + else // 1, 3 return load32_aligned(ptr, offset); } @@ -38,9 +38,11 @@ namespace LIBC_NAMESPACE { return load64_aligned(ptr, offset); else if (alignment == 4) return load64_aligned(ptr, offset); + else if (alignment == 6) + return load64_aligned(ptr, offset); else if (alignment == 2) return load64_aligned(ptr, offset); - else + else // 1, 3, 5, 7 return load64_aligned( ptr, offset); } diff --git a/llvm/docs/CommandGuide/dsymutil.rst b/llvm/docs/CommandGuide/dsymutil.rst index df621a429bb5c3..af9d7f16b36196 100644 --- a/llvm/docs/CommandGuide/dsymutil.rst +++ b/llvm/docs/CommandGuide/dsymutil.rst @@ -34,7 +34,7 @@ OPTIONS .. option:: --build-variant-suffix - Specify the build variant suffix used to build the executabe file. + Specify the build variant suffix used to build the executable file. There can be multiple variants for the binary of a product, each built slightly differently. The most common build variants are 'debug' and 'profile'. Setting the DYLD_IMAGE_SUFFIX environment variable will diff --git a/llvm/docs/CommandGuide/llvm-remarkutil.rst b/llvm/docs/CommandGuide/llvm-remarkutil.rst index b3ce45f6c7e840..dcb2c50c0227c4 100644 --- a/llvm/docs/CommandGuide/llvm-remarkutil.rst +++ b/llvm/docs/CommandGuide/llvm-remarkutil.rst @@ -123,7 +123,7 @@ Summary ^^^^^^^ :program:`llvm-remarkutil count` counts `remarks ` based on specified properties. -By default the tool counts remarks based on how many occour in a source file or function or total for the generated remark file. +By default the tool counts remarks based on how many occur in a source file or function or total for the generated remark file. The tool also supports collecting count based on specific remark arguments. The specified arguments should have an integer value to be able to report a count. The tool contains utilities to filter the remark count based on remark name, pass name, argument value and remark type. @@ -149,7 +149,7 @@ OPTIONS * ``Total``: Report a count for the provided remark file. .. option:: --args[=arguments] - If `count-by` is set to `arg` this flag can be used to collect from specified remark arguments represented as a comma seperated string. + If `count-by` is set to `arg` this flag can be used to collect from specified remark arguments represented as a comma separated string. The arguments must have a numeral value to be able to count remarks by .. option:: --rargs[=arguments] diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 30eb1bfe4b2d46..2743fa0051340c 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -6138,7 +6138,7 @@ The current supported opcode vocabulary is limited: instruction. Because ``DW_OP_LLVM_entry_value`` is defined in terms of registers, it is - usually used in MIR, but it is also allowed in LLVM IR when targetting a + usually used in MIR, but it is also allowed in LLVM IR when targeting a :ref:`swiftasync ` argument. The operation is introduced by: - ``LiveDebugValues`` pass, which applies it to function parameters that diff --git a/llvm/docs/LoopTerminology.rst b/llvm/docs/LoopTerminology.rst index 995d3e10a0ae35..f15f9e682d7e51 100644 --- a/llvm/docs/LoopTerminology.rst +++ b/llvm/docs/LoopTerminology.rst @@ -158,7 +158,7 @@ a dominating header. * The `FixIrreducible `_ pass can transform irreducible control flow into loops by inserting - new loop headers. It is not inlcuded in any default optimization pass + new loop headers. It is not included in any default optimization pass pipeline, but is required for some back-end targets. diff --git a/llvm/docs/ProgrammersManual.rst b/llvm/docs/ProgrammersManual.rst index 5bc71bea77cdfc..551a23c0570e96 100644 --- a/llvm/docs/ProgrammersManual.rst +++ b/llvm/docs/ProgrammersManual.rst @@ -3561,8 +3561,8 @@ Important Public Members of the ``Module`` class * | ``Module::global_iterator`` - Typedef for global variable list iterator | ``Module::const_global_iterator`` - Typedef for const_iterator. | ``Module::insertGlobalVariable()`` - Inserts a global variable to the list. - | ``Module::removeGlobalVariable()`` - Removes a global variable frome the list. - | ``Module::eraseGlobalVariable()`` - Removes a global variable frome the list and deletes it. + | ``Module::removeGlobalVariable()`` - Removes a global variable from the list. + | ``Module::eraseGlobalVariable()`` - Removes a global variable from the list and deletes it. | ``global_begin()``, ``global_end()``, ``global_size()``, ``global_empty()`` These are forwarding methods that make it easy to access the contents of a diff --git a/llvm/include/llvm/IR/AutoUpgrade.h b/llvm/include/llvm/IR/AutoUpgrade.h index f9b5d0c843907e..152f781ffa9b30 100644 --- a/llvm/include/llvm/IR/AutoUpgrade.h +++ b/llvm/include/llvm/IR/AutoUpgrade.h @@ -90,7 +90,7 @@ namespace llvm { /// Check whether a string looks like an old loop attachment tag. inline bool mayBeOldLoopAttachmentTag(StringRef Name) { - return Name.startswith("llvm.vectorizer."); + return Name.starts_with("llvm.vectorizer."); } /// Upgrade the loop attachment metadata node. diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index 2458c1cb9f8ec1..ff14da4c1e73e9 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -7880,22 +7880,22 @@ static Value *lookThroughCast(CmpInst *CmpI, Value *V1, Value *V2, } break; case Instruction::FPTrunc: - CastedTo = ConstantExpr::getFPExtend(C, SrcTy, true); + CastedTo = ConstantFoldCastOperand(Instruction::FPExt, C, SrcTy, DL); break; case Instruction::FPExt: - CastedTo = ConstantExpr::getFPTrunc(C, SrcTy, true); + CastedTo = ConstantFoldCastOperand(Instruction::FPTrunc, C, SrcTy, DL); break; case Instruction::FPToUI: - CastedTo = ConstantExpr::getUIToFP(C, SrcTy, true); + CastedTo = ConstantFoldCastOperand(Instruction::UIToFP, C, SrcTy, DL); break; case Instruction::FPToSI: - CastedTo = ConstantExpr::getSIToFP(C, SrcTy, true); + CastedTo = ConstantFoldCastOperand(Instruction::SIToFP, C, SrcTy, DL); break; case Instruction::UIToFP: - CastedTo = ConstantExpr::getFPToUI(C, SrcTy, true); + CastedTo = ConstantFoldCastOperand(Instruction::FPToUI, C, SrcTy, DL); break; case Instruction::SIToFP: - CastedTo = ConstantExpr::getFPToSI(C, SrcTy, true); + CastedTo = ConstantFoldCastOperand(Instruction::FPToSI, C, SrcTy, DL); break; default: break; diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 74298cc4a1fc82..144be24be6450e 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -2410,7 +2410,8 @@ bool AsmPrinter::doFinalization(Module &M) { OutStreamer->emitAddrsig(); for (const GlobalValue &GV : M.global_values()) { if (!GV.use_empty() && !GV.isThreadLocal() && - !GV.hasDLLImportStorageClass() && !GV.getName().startswith("llvm.") && + !GV.hasDLLImportStorageClass() && + !GV.getName().starts_with("llvm.") && !GV.hasAtLeastLocalUnnamedAddr()) OutStreamer->emitAddrsigSym(getSymbol(&GV)); } diff --git a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp index 06dcf0a4d463b4..3a9d5fa3b936e0 100644 --- a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp @@ -142,7 +142,7 @@ StringRef CodeViewDebug::getFullFilepath(const DIFile *File) { // If this is a Unix-style path, just use it as is. Don't try to canonicalize // it textually because one of the path components could be a symlink. - if (Dir.startswith("/") || Filename.startswith("/")) { + if (Dir.starts_with("/") || Filename.starts_with("/")) { if (llvm::sys::path::is_absolute(Filename, llvm::sys::path::Style::posix)) return Filename; Filepath = std::string(Dir); @@ -910,10 +910,10 @@ static std::string flattenCommandLine(ArrayRef Args, i++; // Skip this argument and next one. continue; } - if (Arg.startswith("-object-file-name") || Arg == MainFilename) + if (Arg.starts_with("-object-file-name") || Arg == MainFilename) continue; // Skip fmessage-length for reproduciability. - if (Arg.startswith("-fmessage-length")) + if (Arg.starts_with("-fmessage-length")) continue; if (PrintedOneArg) OS << " "; @@ -2583,7 +2583,7 @@ CodeViewDebug::lowerRecordFieldList(const DICompositeType *Ty) { // Virtual function pointer member. if ((Member->getFlags() & DINode::FlagArtificial) && - Member->getName().startswith("_vptr$")) { + Member->getName().starts_with("_vptr$")) { VFPtrRecord VFPR(getTypeIndex(Member->getBaseType())); ContinuationBuilder.writeMemberType(VFPR); MemberCount++; diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index b8cd0322f17be5..3cae70fecf3c93 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -447,7 +447,7 @@ DwarfDebug::DwarfDebug(AsmPrinter *A) DwarfDebug::~DwarfDebug() = default; static bool isObjCClass(StringRef Name) { - return Name.startswith("+") || Name.startswith("-"); + return Name.starts_with("+") || Name.starts_with("-"); } static bool hasObjCCategory(StringRef Name) { diff --git a/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp b/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp index 5d813b72c0b751..cd18703b359e1f 100644 --- a/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp @@ -75,7 +75,7 @@ static bool isPossibleIndirectCallTarget(const Function *F) { } MCSymbol *WinCFGuard::lookupImpSymbol(const MCSymbol *Sym) { - if (Sym->getName().startswith("__imp_")) + if (Sym->getName().starts_with("__imp_")) return nullptr; return Asm->OutContext.lookupSymbol(Twine("__imp_") + Sym->getName()); } diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 82751a442dbc3b..ad5a4506efbd82 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -18,6 +18,7 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineFunction.h" @@ -324,7 +325,8 @@ SelectionDAGLegalize::ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP) { TLI.isLoadExtLegal(ISD::EXTLOAD, OrigVT, SVT) && TLI.ShouldShrinkFPConstant(OrigVT)) { Type *SType = SVT.getTypeForEVT(*DAG.getContext()); - LLVMC = cast(ConstantExpr::getFPTrunc(LLVMC, SType)); + LLVMC = cast(ConstantFoldCastOperand( + Instruction::FPTrunc, LLVMC, SType, DAG.getDataLayout())); VT = SVT; Extend = true; } diff --git a/llvm/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp b/llvm/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp index 126725f229c545..8302cbbf231aed 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp @@ -1014,7 +1014,7 @@ std::optional llvm::StripTemplateParameters(StringRef Name) { // // We look for > at the end but if it does not contain any < then we // have something like operator>>. We check for the operator<=> case. - if (!Name.endswith(">") || Name.count("<") == 0 || Name.endswith("<=>")) + if (!Name.ends_with(">") || Name.count("<") == 0 || Name.ends_with("<=>")) return {}; // How many < until we have the start of the template parameters. diff --git a/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp b/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp index 57ca11a077a486..088dffeaa2b9f6 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp @@ -2204,7 +2204,7 @@ class DWARFObjInMemory final : public DWARFObject { continue; if (!Section.relocations().empty() && Name.ends_with(".dwo") && - RelSecName.startswith(".debug")) { + RelSecName.starts_with(".debug")) { HandleWarning(createError("unexpected relocations for dwo section '" + RelSecName + "'")); } diff --git a/llvm/lib/DebugInfo/DWARF/DWARFTypePrinter.cpp b/llvm/lib/DebugInfo/DWARF/DWARFTypePrinter.cpp index 9a2268a948c60a..f6adae6058e996 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFTypePrinter.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFTypePrinter.cpp @@ -8,7 +8,7 @@ void DWARFTypePrinter::appendTypeTagName(dwarf::Tag T) { StringRef TagStr = TagString(T); static constexpr StringRef Prefix = "DW_TAG_"; static constexpr StringRef Suffix = "_type"; - if (!TagStr.startswith(Prefix) || !TagStr.endswith(Suffix)) + if (!TagStr.starts_with(Prefix) || !TagStr.ends_with(Suffix)) return; OS << TagStr.substr(Prefix.size(), TagStr.size() - (Prefix.size() + Suffix.size())) @@ -178,7 +178,7 @@ DWARFTypePrinter::appendUnqualifiedNameBefore(DWARFDie D, Word = true; StringRef Name = NamePtr; static constexpr StringRef MangledPrefix = "_STN|"; - if (Name.startswith(MangledPrefix)) { + if (Name.starts_with(MangledPrefix)) { Name = Name.drop_front(MangledPrefix.size()); auto Separator = Name.find('|'); assert(Separator != StringRef::npos); @@ -188,12 +188,12 @@ DWARFTypePrinter::appendUnqualifiedNameBefore(DWARFDie D, *OriginalFullName = (BaseName + TemplateArgs).str(); Name = BaseName; } else - EndedWithTemplate = Name.endswith(">"); + EndedWithTemplate = Name.ends_with(">"); OS << Name; // This check would be insufficient for operator overloads like // "operator>>" - but for now Clang doesn't try to simplify them, so this // is OK. Add more nuanced operator overload handling here if/when needed. - if (Name.endswith(">")) + if (Name.ends_with(">")) break; if (!appendTemplateParameters(D)) break; diff --git a/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp b/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp index d720c1e3349551..7809fd65bfc8f9 100644 --- a/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp +++ b/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp @@ -156,7 +156,7 @@ getQualifiedNameIndex(DWARFDie &Die, uint64_t Language, GsymCreator &Gsym) { // Some GCC optimizations create functions with names ending with .isra. // or .part. and those names are just DW_AT_name, not DW_AT_linkage_name // If it looks like it could be the case, don't add any prefix - if (ShortName.startswith("_Z") && + if (ShortName.starts_with("_Z") && (ShortName.contains(".isra.") || ShortName.contains(".part."))) return Gsym.insertString(ShortName, /* Copy */ false); diff --git a/llvm/lib/DebugInfo/LogicalView/Readers/LVCodeViewReader.cpp b/llvm/lib/DebugInfo/LogicalView/Readers/LVCodeViewReader.cpp index d1789fe587f3a5..1f6724988ae979 100644 --- a/llvm/lib/DebugInfo/LogicalView/Readers/LVCodeViewReader.cpp +++ b/llvm/lib/DebugInfo/LogicalView/Readers/LVCodeViewReader.cpp @@ -221,7 +221,7 @@ bool LVCodeViewReader::isSystemEntry(LVElement *Element, StringRef Name) const { return StringRef::npos != Name.find(String); }; auto Starts = [=](const char *Pattern) -> bool { - return Name.startswith(Pattern); + return Name.starts_with(Pattern); }; auto CheckExclude = [&]() -> bool { if (Starts("__") || Starts("_PMD") || Starts("_PMFN")) diff --git a/llvm/lib/DebugInfo/PDB/Native/InputFile.cpp b/llvm/lib/DebugInfo/PDB/Native/InputFile.cpp index cddee3e1c273f5..328d0f5ab060fe 100644 --- a/llvm/lib/DebugInfo/PDB/Native/InputFile.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/InputFile.cpp @@ -561,7 +561,7 @@ static bool isMyCode(const SymbolGroup &Group) { return true; StringRef Name = Group.name(); - if (Name.startswith("Import:")) + if (Name.starts_with("Import:")) return false; if (Name.ends_with_insensitive(".dll")) return false; diff --git a/llvm/lib/DebugInfo/PDB/Native/NativeSession.cpp b/llvm/lib/DebugInfo/PDB/Native/NativeSession.cpp index fb0ca2f9cf229c..d5cac33d15193e 100644 --- a/llvm/lib/DebugInfo/PDB/Native/NativeSession.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/NativeSession.cpp @@ -176,7 +176,7 @@ NativeSession::searchForPdb(const PdbSearchOptions &Opts) { if (!PathOrErr) return PathOrErr.takeError(); StringRef PathFromExe = PathOrErr.get(); - sys::path::Style Style = PathFromExe.startswith("/") + sys::path::Style Style = PathFromExe.starts_with("/") ? sys::path::Style::posix : sys::path::Style::windows; StringRef PdbName = sys::path::filename(PathFromExe, Style); diff --git a/llvm/lib/DebugInfo/PDB/Native/TpiHashing.cpp b/llvm/lib/DebugInfo/PDB/Native/TpiHashing.cpp index b71b2b15814419..941ce78027a213 100644 --- a/llvm/lib/DebugInfo/PDB/Native/TpiHashing.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/TpiHashing.cpp @@ -19,7 +19,7 @@ using namespace llvm::pdb; // Corresponds to `fUDTAnon`. static bool isAnonymous(StringRef Name) { return Name == "" || Name == "__unnamed" || - Name.endswith("::") || Name.endswith("::__unnamed"); + Name.ends_with("::") || Name.ends_with("::__unnamed"); } // Computes the hash for a user-defined type record. This could be a struct, diff --git a/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp b/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp index d7b33ce1d0f062..716312f26e0bac 100644 --- a/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp +++ b/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp @@ -90,7 +90,7 @@ class SourceCode { size_t PosEnd = PrunedSource->find('\n', Pos); StringRef String = PrunedSource->substr( Pos, (PosEnd == StringRef::npos) ? StringRef::npos : (PosEnd - Pos)); - if (String.endswith("\r")) + if (String.ends_with("\r")) String = String.drop_back(1); OS << format_decimal(L, MaxLineNumberWidth); if (L == Line) diff --git a/llvm/lib/DebugInfo/Symbolize/MarkupFilter.cpp b/llvm/lib/DebugInfo/Symbolize/MarkupFilter.cpp index a2bc2577b70acb..f7503ef49693d8 100644 --- a/llvm/lib/DebugInfo/Symbolize/MarkupFilter.cpp +++ b/llvm/lib/DebugInfo/Symbolize/MarkupFilter.cpp @@ -552,7 +552,7 @@ std::optional MarkupFilter::parseAddr(StringRef Str) const { } if (all_of(Str, [](char C) { return C == '0'; })) return 0; - if (!Str.startswith("0x")) { + if (!Str.starts_with("0x")) { reportTypeError(Str, "address"); return std::nullopt; } @@ -741,7 +741,7 @@ uint64_t MarkupFilter::adjustAddr(uint64_t Addr, PCType Type) const { } StringRef MarkupFilter::lineEnding() const { - return Line.endswith("\r\n") ? "\r\n" : "\n"; + return Line.ends_with("\r\n") ? "\r\n" : "\n"; } bool MarkupFilter::MMap::contains(uint64_t Addr) const { diff --git a/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp b/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp index fc3a77c8c35450..8c18be189b7919 100644 --- a/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp +++ b/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp @@ -732,7 +732,7 @@ StringRef demanglePE32ExternCFunc(StringRef SymbolName) { // Remove any ending '@' for vectorcall. bool IsVectorCall = false; - if (HasAtNumSuffix && SymbolName.endswith("@")) { + if (HasAtNumSuffix && SymbolName.ends_with("@")) { SymbolName = SymbolName.drop_back(); IsVectorCall = true; } diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index af178daffccbc5..f67f5cbbfd29ff 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -108,60 +108,60 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) { Name == "addcarry.u64" || // Added in 8.0 Name == "subborrow.u32" || // Added in 8.0 Name == "subborrow.u64" || // Added in 8.0 - Name.startswith("sse2.padds.") || // Added in 8.0 - Name.startswith("sse2.psubs.") || // Added in 8.0 - Name.startswith("sse2.paddus.") || // Added in 8.0 - Name.startswith("sse2.psubus.") || // Added in 8.0 - Name.startswith("avx2.padds.") || // Added in 8.0 - Name.startswith("avx2.psubs.") || // Added in 8.0 - Name.startswith("avx2.paddus.") || // Added in 8.0 - Name.startswith("avx2.psubus.") || // Added in 8.0 - Name.startswith("avx512.padds.") || // Added in 8.0 - Name.startswith("avx512.psubs.") || // Added in 8.0 - Name.startswith("avx512.mask.padds.") || // Added in 8.0 - Name.startswith("avx512.mask.psubs.") || // Added in 8.0 - Name.startswith("avx512.mask.paddus.") || // Added in 8.0 - Name.startswith("avx512.mask.psubus.") || // Added in 8.0 + Name.starts_with("sse2.padds.") || // Added in 8.0 + Name.starts_with("sse2.psubs.") || // Added in 8.0 + Name.starts_with("sse2.paddus.") || // Added in 8.0 + Name.starts_with("sse2.psubus.") || // Added in 8.0 + Name.starts_with("avx2.padds.") || // Added in 8.0 + Name.starts_with("avx2.psubs.") || // Added in 8.0 + Name.starts_with("avx2.paddus.") || // Added in 8.0 + Name.starts_with("avx2.psubus.") || // Added in 8.0 + Name.starts_with("avx512.padds.") || // Added in 8.0 + Name.starts_with("avx512.psubs.") || // Added in 8.0 + Name.starts_with("avx512.mask.padds.") || // Added in 8.0 + Name.starts_with("avx512.mask.psubs.") || // Added in 8.0 + Name.starts_with("avx512.mask.paddus.") || // Added in 8.0 + Name.starts_with("avx512.mask.psubus.") || // Added in 8.0 Name=="ssse3.pabs.b.128" || // Added in 6.0 Name=="ssse3.pabs.w.128" || // Added in 6.0 Name=="ssse3.pabs.d.128" || // Added in 6.0 - Name.startswith("fma4.vfmadd.s") || // Added in 7.0 - Name.startswith("fma.vfmadd.") || // Added in 7.0 - Name.startswith("fma.vfmsub.") || // Added in 7.0 - Name.startswith("fma.vfmsubadd.") || // Added in 7.0 - Name.startswith("fma.vfnmadd.") || // Added in 7.0 - Name.startswith("fma.vfnmsub.") || // Added in 7.0 - Name.startswith("avx512.mask.vfmadd.") || // Added in 7.0 - Name.startswith("avx512.mask.vfnmadd.") || // Added in 7.0 - Name.startswith("avx512.mask.vfnmsub.") || // Added in 7.0 - Name.startswith("avx512.mask3.vfmadd.") || // Added in 7.0 - Name.startswith("avx512.maskz.vfmadd.") || // Added in 7.0 - Name.startswith("avx512.mask3.vfmsub.") || // Added in 7.0 - Name.startswith("avx512.mask3.vfnmsub.") || // Added in 7.0 - Name.startswith("avx512.mask.vfmaddsub.") || // Added in 7.0 - Name.startswith("avx512.maskz.vfmaddsub.") || // Added in 7.0 - Name.startswith("avx512.mask3.vfmaddsub.") || // Added in 7.0 - Name.startswith("avx512.mask3.vfmsubadd.") || // Added in 7.0 - Name.startswith("avx512.mask.shuf.i") || // Added in 6.0 - Name.startswith("avx512.mask.shuf.f") || // Added in 6.0 - Name.startswith("avx512.kunpck") || //added in 6.0 - Name.startswith("avx2.pabs.") || // Added in 6.0 - Name.startswith("avx512.mask.pabs.") || // Added in 6.0 - Name.startswith("avx512.broadcastm") || // Added in 6.0 + Name.starts_with("fma4.vfmadd.s") || // Added in 7.0 + Name.starts_with("fma.vfmadd.") || // Added in 7.0 + Name.starts_with("fma.vfmsub.") || // Added in 7.0 + Name.starts_with("fma.vfmsubadd.") || // Added in 7.0 + Name.starts_with("fma.vfnmadd.") || // Added in 7.0 + Name.starts_with("fma.vfnmsub.") || // Added in 7.0 + Name.starts_with("avx512.mask.vfmadd.") || // Added in 7.0 + Name.starts_with("avx512.mask.vfnmadd.") || // Added in 7.0 + Name.starts_with("avx512.mask.vfnmsub.") || // Added in 7.0 + Name.starts_with("avx512.mask3.vfmadd.") || // Added in 7.0 + Name.starts_with("avx512.maskz.vfmadd.") || // Added in 7.0 + Name.starts_with("avx512.mask3.vfmsub.") || // Added in 7.0 + Name.starts_with("avx512.mask3.vfnmsub.") || // Added in 7.0 + Name.starts_with("avx512.mask.vfmaddsub.") || // Added in 7.0 + Name.starts_with("avx512.maskz.vfmaddsub.") || // Added in 7.0 + Name.starts_with("avx512.mask3.vfmaddsub.") || // Added in 7.0 + Name.starts_with("avx512.mask3.vfmsubadd.") || // Added in 7.0 + Name.starts_with("avx512.mask.shuf.i") || // Added in 6.0 + Name.starts_with("avx512.mask.shuf.f") || // Added in 6.0 + Name.starts_with("avx512.kunpck") || //added in 6.0 + Name.starts_with("avx2.pabs.") || // Added in 6.0 + Name.starts_with("avx512.mask.pabs.") || // Added in 6.0 + Name.starts_with("avx512.broadcastm") || // Added in 6.0 Name == "sse.sqrt.ss" || // Added in 7.0 Name == "sse2.sqrt.sd" || // Added in 7.0 - Name.startswith("avx512.mask.sqrt.p") || // Added in 7.0 - Name.startswith("avx.sqrt.p") || // Added in 7.0 - Name.startswith("sse2.sqrt.p") || // Added in 7.0 - Name.startswith("sse.sqrt.p") || // Added in 7.0 - Name.startswith("avx512.mask.pbroadcast") || // Added in 6.0 - Name.startswith("sse2.pcmpeq.") || // Added in 3.1 - Name.startswith("sse2.pcmpgt.") || // Added in 3.1 - Name.startswith("avx2.pcmpeq.") || // Added in 3.1 - Name.startswith("avx2.pcmpgt.") || // Added in 3.1 - Name.startswith("avx512.mask.pcmpeq.") || // Added in 3.9 - Name.startswith("avx512.mask.pcmpgt.") || // Added in 3.9 - Name.startswith("avx.vperm2f128.") || // Added in 6.0 + Name.starts_with("avx512.mask.sqrt.p") || // Added in 7.0 + Name.starts_with("avx.sqrt.p") || // Added in 7.0 + Name.starts_with("sse2.sqrt.p") || // Added in 7.0 + Name.starts_with("sse.sqrt.p") || // Added in 7.0 + Name.starts_with("avx512.mask.pbroadcast") || // Added in 6.0 + Name.starts_with("sse2.pcmpeq.") || // Added in 3.1 + Name.starts_with("sse2.pcmpgt.") || // Added in 3.1 + Name.starts_with("avx2.pcmpeq.") || // Added in 3.1 + Name.starts_with("avx2.pcmpgt.") || // Added in 3.1 + Name.starts_with("avx512.mask.pcmpeq.") || // Added in 3.9 + Name.starts_with("avx512.mask.pcmpgt.") || // Added in 3.9 + Name.starts_with("avx.vperm2f128.") || // Added in 6.0 Name == "avx2.vperm2i128" || // Added in 6.0 Name == "sse.add.ss" || // Added in 4.0 Name == "sse2.add.sd" || // Added in 4.0 @@ -191,48 +191,48 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) { Name == "avx512.kxnor.w" || // Added in 7.0 Name == "avx512.kortestc.w" || // Added in 7.0 Name == "avx512.kortestz.w" || // Added in 7.0 - Name.startswith("avx512.mask.pshuf.b.") || // Added in 4.0 - Name.startswith("avx2.pmax") || // Added in 3.9 - Name.startswith("avx2.pmin") || // Added in 3.9 - Name.startswith("avx512.mask.pmax") || // Added in 4.0 - Name.startswith("avx512.mask.pmin") || // Added in 4.0 - Name.startswith("avx2.vbroadcast") || // Added in 3.8 - Name.startswith("avx2.pbroadcast") || // Added in 3.8 - Name.startswith("avx.vpermil.") || // Added in 3.1 - Name.startswith("sse2.pshuf") || // Added in 3.9 - Name.startswith("avx512.pbroadcast") || // Added in 3.9 - Name.startswith("avx512.mask.broadcast.s") || // Added in 3.9 - Name.startswith("avx512.mask.movddup") || // Added in 3.9 - Name.startswith("avx512.mask.movshdup") || // Added in 3.9 - Name.startswith("avx512.mask.movsldup") || // Added in 3.9 - Name.startswith("avx512.mask.pshuf.d.") || // Added in 3.9 - Name.startswith("avx512.mask.pshufl.w.") || // Added in 3.9 - Name.startswith("avx512.mask.pshufh.w.") || // Added in 3.9 - Name.startswith("avx512.mask.shuf.p") || // Added in 4.0 - Name.startswith("avx512.mask.vpermil.p") || // Added in 3.9 - Name.startswith("avx512.mask.perm.df.") || // Added in 3.9 - Name.startswith("avx512.mask.perm.di.") || // Added in 3.9 - Name.startswith("avx512.mask.punpckl") || // Added in 3.9 - Name.startswith("avx512.mask.punpckh") || // Added in 3.9 - Name.startswith("avx512.mask.unpckl.") || // Added in 3.9 - Name.startswith("avx512.mask.unpckh.") || // Added in 3.9 - Name.startswith("avx512.mask.pand.") || // Added in 3.9 - Name.startswith("avx512.mask.pandn.") || // Added in 3.9 - Name.startswith("avx512.mask.por.") || // Added in 3.9 - Name.startswith("avx512.mask.pxor.") || // Added in 3.9 - Name.startswith("avx512.mask.and.") || // Added in 3.9 - Name.startswith("avx512.mask.andn.") || // Added in 3.9 - Name.startswith("avx512.mask.or.") || // Added in 3.9 - Name.startswith("avx512.mask.xor.") || // Added in 3.9 - Name.startswith("avx512.mask.padd.") || // Added in 4.0 - Name.startswith("avx512.mask.psub.") || // Added in 4.0 - Name.startswith("avx512.mask.pmull.") || // Added in 4.0 - Name.startswith("avx512.mask.cvtdq2pd.") || // Added in 4.0 - Name.startswith("avx512.mask.cvtudq2pd.") || // Added in 4.0 - Name.startswith("avx512.mask.cvtudq2ps.") || // Added in 7.0 updated 9.0 - Name.startswith("avx512.mask.cvtqq2pd.") || // Added in 7.0 updated 9.0 - Name.startswith("avx512.mask.cvtuqq2pd.") || // Added in 7.0 updated 9.0 - Name.startswith("avx512.mask.cvtdq2ps.") || // Added in 7.0 updated 9.0 + Name.starts_with("avx512.mask.pshuf.b.") || // Added in 4.0 + Name.starts_with("avx2.pmax") || // Added in 3.9 + Name.starts_with("avx2.pmin") || // Added in 3.9 + Name.starts_with("avx512.mask.pmax") || // Added in 4.0 + Name.starts_with("avx512.mask.pmin") || // Added in 4.0 + Name.starts_with("avx2.vbroadcast") || // Added in 3.8 + Name.starts_with("avx2.pbroadcast") || // Added in 3.8 + Name.starts_with("avx.vpermil.") || // Added in 3.1 + Name.starts_with("sse2.pshuf") || // Added in 3.9 + Name.starts_with("avx512.pbroadcast") || // Added in 3.9 + Name.starts_with("avx512.mask.broadcast.s") || // Added in 3.9 + Name.starts_with("avx512.mask.movddup") || // Added in 3.9 + Name.starts_with("avx512.mask.movshdup") || // Added in 3.9 + Name.starts_with("avx512.mask.movsldup") || // Added in 3.9 + Name.starts_with("avx512.mask.pshuf.d.") || // Added in 3.9 + Name.starts_with("avx512.mask.pshufl.w.") || // Added in 3.9 + Name.starts_with("avx512.mask.pshufh.w.") || // Added in 3.9 + Name.starts_with("avx512.mask.shuf.p") || // Added in 4.0 + Name.starts_with("avx512.mask.vpermil.p") || // Added in 3.9 + Name.starts_with("avx512.mask.perm.df.") || // Added in 3.9 + Name.starts_with("avx512.mask.perm.di.") || // Added in 3.9 + Name.starts_with("avx512.mask.punpckl") || // Added in 3.9 + Name.starts_with("avx512.mask.punpckh") || // Added in 3.9 + Name.starts_with("avx512.mask.unpckl.") || // Added in 3.9 + Name.starts_with("avx512.mask.unpckh.") || // Added in 3.9 + Name.starts_with("avx512.mask.pand.") || // Added in 3.9 + Name.starts_with("avx512.mask.pandn.") || // Added in 3.9 + Name.starts_with("avx512.mask.por.") || // Added in 3.9 + Name.starts_with("avx512.mask.pxor.") || // Added in 3.9 + Name.starts_with("avx512.mask.and.") || // Added in 3.9 + Name.starts_with("avx512.mask.andn.") || // Added in 3.9 + Name.starts_with("avx512.mask.or.") || // Added in 3.9 + Name.starts_with("avx512.mask.xor.") || // Added in 3.9 + Name.starts_with("avx512.mask.padd.") || // Added in 4.0 + Name.starts_with("avx512.mask.psub.") || // Added in 4.0 + Name.starts_with("avx512.mask.pmull.") || // Added in 4.0 + Name.starts_with("avx512.mask.cvtdq2pd.") || // Added in 4.0 + Name.starts_with("avx512.mask.cvtudq2pd.") || // Added in 4.0 + Name.starts_with("avx512.mask.cvtudq2ps.") || // Added in 7.0 updated 9.0 + Name.starts_with("avx512.mask.cvtqq2pd.") || // Added in 7.0 updated 9.0 + Name.starts_with("avx512.mask.cvtuqq2pd.") || // Added in 7.0 updated 9.0 + Name.starts_with("avx512.mask.cvtdq2ps.") || // Added in 7.0 updated 9.0 Name == "avx512.mask.vcvtph2ps.128" || // Added in 11.0 Name == "avx512.mask.vcvtph2ps.256" || // Added in 11.0 Name == "avx512.mask.cvtqq2ps.256" || // Added in 9.0 @@ -247,91 +247,91 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) { Name == "avx512.mask.cvtps2pd.128" || // Added in 7.0 Name == "avx512.mask.cvtps2pd.256" || // Added in 7.0 Name == "avx512.cvtusi2sd" || // Added in 7.0 - Name.startswith("avx512.mask.permvar.") || // Added in 7.0 + Name.starts_with("avx512.mask.permvar.") || // Added in 7.0 Name == "sse2.pmulu.dq" || // Added in 7.0 Name == "sse41.pmuldq" || // Added in 7.0 Name == "avx2.pmulu.dq" || // Added in 7.0 Name == "avx2.pmul.dq" || // Added in 7.0 Name == "avx512.pmulu.dq.512" || // Added in 7.0 Name == "avx512.pmul.dq.512" || // Added in 7.0 - Name.startswith("avx512.mask.pmul.dq.") || // Added in 4.0 - Name.startswith("avx512.mask.pmulu.dq.") || // Added in 4.0 - Name.startswith("avx512.mask.pmul.hr.sw.") || // Added in 7.0 - Name.startswith("avx512.mask.pmulh.w.") || // Added in 7.0 - Name.startswith("avx512.mask.pmulhu.w.") || // Added in 7.0 - Name.startswith("avx512.mask.pmaddw.d.") || // Added in 7.0 - Name.startswith("avx512.mask.pmaddubs.w.") || // Added in 7.0 - Name.startswith("avx512.mask.packsswb.") || // Added in 5.0 - Name.startswith("avx512.mask.packssdw.") || // Added in 5.0 - Name.startswith("avx512.mask.packuswb.") || // Added in 5.0 - Name.startswith("avx512.mask.packusdw.") || // Added in 5.0 - Name.startswith("avx512.mask.cmp.b") || // Added in 5.0 - Name.startswith("avx512.mask.cmp.d") || // Added in 5.0 - Name.startswith("avx512.mask.cmp.q") || // Added in 5.0 - Name.startswith("avx512.mask.cmp.w") || // Added in 5.0 - Name.startswith("avx512.cmp.p") || // Added in 12.0 - Name.startswith("avx512.mask.ucmp.") || // Added in 5.0 - Name.startswith("avx512.cvtb2mask.") || // Added in 7.0 - Name.startswith("avx512.cvtw2mask.") || // Added in 7.0 - Name.startswith("avx512.cvtd2mask.") || // Added in 7.0 - Name.startswith("avx512.cvtq2mask.") || // Added in 7.0 - Name.startswith("avx512.mask.vpermilvar.") || // Added in 4.0 - Name.startswith("avx512.mask.psll.d") || // Added in 4.0 - Name.startswith("avx512.mask.psll.q") || // Added in 4.0 - Name.startswith("avx512.mask.psll.w") || // Added in 4.0 - Name.startswith("avx512.mask.psra.d") || // Added in 4.0 - Name.startswith("avx512.mask.psra.q") || // Added in 4.0 - Name.startswith("avx512.mask.psra.w") || // Added in 4.0 - Name.startswith("avx512.mask.psrl.d") || // Added in 4.0 - Name.startswith("avx512.mask.psrl.q") || // Added in 4.0 - Name.startswith("avx512.mask.psrl.w") || // Added in 4.0 - Name.startswith("avx512.mask.pslli") || // Added in 4.0 - Name.startswith("avx512.mask.psrai") || // Added in 4.0 - Name.startswith("avx512.mask.psrli") || // Added in 4.0 - Name.startswith("avx512.mask.psllv") || // Added in 4.0 - Name.startswith("avx512.mask.psrav") || // Added in 4.0 - Name.startswith("avx512.mask.psrlv") || // Added in 4.0 - Name.startswith("sse41.pmovsx") || // Added in 3.8 - Name.startswith("sse41.pmovzx") || // Added in 3.9 - Name.startswith("avx2.pmovsx") || // Added in 3.9 - Name.startswith("avx2.pmovzx") || // Added in 3.9 - Name.startswith("avx512.mask.pmovsx") || // Added in 4.0 - Name.startswith("avx512.mask.pmovzx") || // Added in 4.0 - Name.startswith("avx512.mask.lzcnt.") || // Added in 5.0 - Name.startswith("avx512.mask.pternlog.") || // Added in 7.0 - Name.startswith("avx512.maskz.pternlog.") || // Added in 7.0 - Name.startswith("avx512.mask.vpmadd52") || // Added in 7.0 - Name.startswith("avx512.maskz.vpmadd52") || // Added in 7.0 - Name.startswith("avx512.mask.vpermi2var.") || // Added in 7.0 - Name.startswith("avx512.mask.vpermt2var.") || // Added in 7.0 - Name.startswith("avx512.maskz.vpermt2var.") || // Added in 7.0 - Name.startswith("avx512.mask.vpdpbusd.") || // Added in 7.0 - Name.startswith("avx512.maskz.vpdpbusd.") || // Added in 7.0 - Name.startswith("avx512.mask.vpdpbusds.") || // Added in 7.0 - Name.startswith("avx512.maskz.vpdpbusds.") || // Added in 7.0 - Name.startswith("avx512.mask.vpdpwssd.") || // Added in 7.0 - Name.startswith("avx512.maskz.vpdpwssd.") || // Added in 7.0 - Name.startswith("avx512.mask.vpdpwssds.") || // Added in 7.0 - Name.startswith("avx512.maskz.vpdpwssds.") || // Added in 7.0 - Name.startswith("avx512.mask.dbpsadbw.") || // Added in 7.0 - Name.startswith("avx512.mask.vpshld.") || // Added in 7.0 - Name.startswith("avx512.mask.vpshrd.") || // Added in 7.0 - Name.startswith("avx512.mask.vpshldv.") || // Added in 8.0 - Name.startswith("avx512.mask.vpshrdv.") || // Added in 8.0 - Name.startswith("avx512.maskz.vpshldv.") || // Added in 8.0 - Name.startswith("avx512.maskz.vpshrdv.") || // Added in 8.0 - Name.startswith("avx512.vpshld.") || // Added in 8.0 - Name.startswith("avx512.vpshrd.") || // Added in 8.0 - Name.startswith("avx512.mask.add.p") || // Added in 7.0. 128/256 in 4.0 - Name.startswith("avx512.mask.sub.p") || // Added in 7.0. 128/256 in 4.0 - Name.startswith("avx512.mask.mul.p") || // Added in 7.0. 128/256 in 4.0 - Name.startswith("avx512.mask.div.p") || // Added in 7.0. 128/256 in 4.0 - Name.startswith("avx512.mask.max.p") || // Added in 7.0. 128/256 in 5.0 - Name.startswith("avx512.mask.min.p") || // Added in 7.0. 128/256 in 5.0 - Name.startswith("avx512.mask.fpclass.p") || // Added in 7.0 - Name.startswith("avx512.mask.vpshufbitqmb.") || // Added in 8.0 - Name.startswith("avx512.mask.pmultishift.qb.") || // Added in 8.0 - Name.startswith("avx512.mask.conflict.") || // Added in 9.0 + Name.starts_with("avx512.mask.pmul.dq.") || // Added in 4.0 + Name.starts_with("avx512.mask.pmulu.dq.") || // Added in 4.0 + Name.starts_with("avx512.mask.pmul.hr.sw.") || // Added in 7.0 + Name.starts_with("avx512.mask.pmulh.w.") || // Added in 7.0 + Name.starts_with("avx512.mask.pmulhu.w.") || // Added in 7.0 + Name.starts_with("avx512.mask.pmaddw.d.") || // Added in 7.0 + Name.starts_with("avx512.mask.pmaddubs.w.") || // Added in 7.0 + Name.starts_with("avx512.mask.packsswb.") || // Added in 5.0 + Name.starts_with("avx512.mask.packssdw.") || // Added in 5.0 + Name.starts_with("avx512.mask.packuswb.") || // Added in 5.0 + Name.starts_with("avx512.mask.packusdw.") || // Added in 5.0 + Name.starts_with("avx512.mask.cmp.b") || // Added in 5.0 + Name.starts_with("avx512.mask.cmp.d") || // Added in 5.0 + Name.starts_with("avx512.mask.cmp.q") || // Added in 5.0 + Name.starts_with("avx512.mask.cmp.w") || // Added in 5.0 + Name.starts_with("avx512.cmp.p") || // Added in 12.0 + Name.starts_with("avx512.mask.ucmp.") || // Added in 5.0 + Name.starts_with("avx512.cvtb2mask.") || // Added in 7.0 + Name.starts_with("avx512.cvtw2mask.") || // Added in 7.0 + Name.starts_with("avx512.cvtd2mask.") || // Added in 7.0 + Name.starts_with("avx512.cvtq2mask.") || // Added in 7.0 + Name.starts_with("avx512.mask.vpermilvar.") || // Added in 4.0 + Name.starts_with("avx512.mask.psll.d") || // Added in 4.0 + Name.starts_with("avx512.mask.psll.q") || // Added in 4.0 + Name.starts_with("avx512.mask.psll.w") || // Added in 4.0 + Name.starts_with("avx512.mask.psra.d") || // Added in 4.0 + Name.starts_with("avx512.mask.psra.q") || // Added in 4.0 + Name.starts_with("avx512.mask.psra.w") || // Added in 4.0 + Name.starts_with("avx512.mask.psrl.d") || // Added in 4.0 + Name.starts_with("avx512.mask.psrl.q") || // Added in 4.0 + Name.starts_with("avx512.mask.psrl.w") || // Added in 4.0 + Name.starts_with("avx512.mask.pslli") || // Added in 4.0 + Name.starts_with("avx512.mask.psrai") || // Added in 4.0 + Name.starts_with("avx512.mask.psrli") || // Added in 4.0 + Name.starts_with("avx512.mask.psllv") || // Added in 4.0 + Name.starts_with("avx512.mask.psrav") || // Added in 4.0 + Name.starts_with("avx512.mask.psrlv") || // Added in 4.0 + Name.starts_with("sse41.pmovsx") || // Added in 3.8 + Name.starts_with("sse41.pmovzx") || // Added in 3.9 + Name.starts_with("avx2.pmovsx") || // Added in 3.9 + Name.starts_with("avx2.pmovzx") || // Added in 3.9 + Name.starts_with("avx512.mask.pmovsx") || // Added in 4.0 + Name.starts_with("avx512.mask.pmovzx") || // Added in 4.0 + Name.starts_with("avx512.mask.lzcnt.") || // Added in 5.0 + Name.starts_with("avx512.mask.pternlog.") || // Added in 7.0 + Name.starts_with("avx512.maskz.pternlog.") || // Added in 7.0 + Name.starts_with("avx512.mask.vpmadd52") || // Added in 7.0 + Name.starts_with("avx512.maskz.vpmadd52") || // Added in 7.0 + Name.starts_with("avx512.mask.vpermi2var.") || // Added in 7.0 + Name.starts_with("avx512.mask.vpermt2var.") || // Added in 7.0 + Name.starts_with("avx512.maskz.vpermt2var.") || // Added in 7.0 + Name.starts_with("avx512.mask.vpdpbusd.") || // Added in 7.0 + Name.starts_with("avx512.maskz.vpdpbusd.") || // Added in 7.0 + Name.starts_with("avx512.mask.vpdpbusds.") || // Added in 7.0 + Name.starts_with("avx512.maskz.vpdpbusds.") || // Added in 7.0 + Name.starts_with("avx512.mask.vpdpwssd.") || // Added in 7.0 + Name.starts_with("avx512.maskz.vpdpwssd.") || // Added in 7.0 + Name.starts_with("avx512.mask.vpdpwssds.") || // Added in 7.0 + Name.starts_with("avx512.maskz.vpdpwssds.") || // Added in 7.0 + Name.starts_with("avx512.mask.dbpsadbw.") || // Added in 7.0 + Name.starts_with("avx512.mask.vpshld.") || // Added in 7.0 + Name.starts_with("avx512.mask.vpshrd.") || // Added in 7.0 + Name.starts_with("avx512.mask.vpshldv.") || // Added in 8.0 + Name.starts_with("avx512.mask.vpshrdv.") || // Added in 8.0 + Name.starts_with("avx512.maskz.vpshldv.") || // Added in 8.0 + Name.starts_with("avx512.maskz.vpshrdv.") || // Added in 8.0 + Name.starts_with("avx512.vpshld.") || // Added in 8.0 + Name.starts_with("avx512.vpshrd.") || // Added in 8.0 + Name.starts_with("avx512.mask.add.p") || // Added in 7.0. 128/256 in 4.0 + Name.starts_with("avx512.mask.sub.p") || // Added in 7.0. 128/256 in 4.0 + Name.starts_with("avx512.mask.mul.p") || // Added in 7.0. 128/256 in 4.0 + Name.starts_with("avx512.mask.div.p") || // Added in 7.0. 128/256 in 4.0 + Name.starts_with("avx512.mask.max.p") || // Added in 7.0. 128/256 in 5.0 + Name.starts_with("avx512.mask.min.p") || // Added in 7.0. 128/256 in 5.0 + Name.starts_with("avx512.mask.fpclass.p") || // Added in 7.0 + Name.starts_with("avx512.mask.vpshufbitqmb.") || // Added in 8.0 + Name.starts_with("avx512.mask.pmultishift.qb.") || // Added in 8.0 + Name.starts_with("avx512.mask.conflict.") || // Added in 9.0 Name == "avx512.mask.pmov.qd.256" || // Added in 9.0 Name == "avx512.mask.pmov.qd.512" || // Added in 9.0 Name == "avx512.mask.pmov.wb.256" || // Added in 9.0 @@ -347,85 +347,85 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) { Name == "avx.cvtdq2.pd.256" || // Added in 3.9 Name == "avx.cvtdq2.ps.256" || // Added in 7.0 Name == "avx.cvt.ps2.pd.256" || // Added in 3.9 - Name.startswith("vcvtph2ps.") || // Added in 11.0 - Name.startswith("avx.vinsertf128.") || // Added in 3.7 + Name.starts_with("vcvtph2ps.") || // Added in 11.0 + Name.starts_with("avx.vinsertf128.") || // Added in 3.7 Name == "avx2.vinserti128" || // Added in 3.7 - Name.startswith("avx512.mask.insert") || // Added in 4.0 - Name.startswith("avx.vextractf128.") || // Added in 3.7 + Name.starts_with("avx512.mask.insert") || // Added in 4.0 + Name.starts_with("avx.vextractf128.") || // Added in 3.7 Name == "avx2.vextracti128" || // Added in 3.7 - Name.startswith("avx512.mask.vextract") || // Added in 4.0 - Name.startswith("sse4a.movnt.") || // Added in 3.9 - Name.startswith("avx.movnt.") || // Added in 3.2 - Name.startswith("avx512.storent.") || // Added in 3.9 + Name.starts_with("avx512.mask.vextract") || // Added in 4.0 + Name.starts_with("sse4a.movnt.") || // Added in 3.9 + Name.starts_with("avx.movnt.") || // Added in 3.2 + Name.starts_with("avx512.storent.") || // Added in 3.9 Name == "sse41.movntdqa" || // Added in 5.0 Name == "avx2.movntdqa" || // Added in 5.0 Name == "avx512.movntdqa" || // Added in 5.0 Name == "sse2.storel.dq" || // Added in 3.9 - Name.startswith("sse.storeu.") || // Added in 3.9 - Name.startswith("sse2.storeu.") || // Added in 3.9 - Name.startswith("avx.storeu.") || // Added in 3.9 - Name.startswith("avx512.mask.storeu.") || // Added in 3.9 - Name.startswith("avx512.mask.store.p") || // Added in 3.9 - Name.startswith("avx512.mask.store.b.") || // Added in 3.9 - Name.startswith("avx512.mask.store.w.") || // Added in 3.9 - Name.startswith("avx512.mask.store.d.") || // Added in 3.9 - Name.startswith("avx512.mask.store.q.") || // Added in 3.9 + Name.starts_with("sse.storeu.") || // Added in 3.9 + Name.starts_with("sse2.storeu.") || // Added in 3.9 + Name.starts_with("avx.storeu.") || // Added in 3.9 + Name.starts_with("avx512.mask.storeu.") || // Added in 3.9 + Name.starts_with("avx512.mask.store.p") || // Added in 3.9 + Name.starts_with("avx512.mask.store.b.") || // Added in 3.9 + Name.starts_with("avx512.mask.store.w.") || // Added in 3.9 + Name.starts_with("avx512.mask.store.d.") || // Added in 3.9 + Name.starts_with("avx512.mask.store.q.") || // Added in 3.9 Name == "avx512.mask.store.ss" || // Added in 7.0 - Name.startswith("avx512.mask.loadu.") || // Added in 3.9 - Name.startswith("avx512.mask.load.") || // Added in 3.9 - Name.startswith("avx512.mask.expand.load.") || // Added in 7.0 - Name.startswith("avx512.mask.compress.store.") || // Added in 7.0 - Name.startswith("avx512.mask.expand.b") || // Added in 9.0 - Name.startswith("avx512.mask.expand.w") || // Added in 9.0 - Name.startswith("avx512.mask.expand.d") || // Added in 9.0 - Name.startswith("avx512.mask.expand.q") || // Added in 9.0 - Name.startswith("avx512.mask.expand.p") || // Added in 9.0 - Name.startswith("avx512.mask.compress.b") || // Added in 9.0 - Name.startswith("avx512.mask.compress.w") || // Added in 9.0 - Name.startswith("avx512.mask.compress.d") || // Added in 9.0 - Name.startswith("avx512.mask.compress.q") || // Added in 9.0 - Name.startswith("avx512.mask.compress.p") || // Added in 9.0 + Name.starts_with("avx512.mask.loadu.") || // Added in 3.9 + Name.starts_with("avx512.mask.load.") || // Added in 3.9 + Name.starts_with("avx512.mask.expand.load.") || // Added in 7.0 + Name.starts_with("avx512.mask.compress.store.") || // Added in 7.0 + Name.starts_with("avx512.mask.expand.b") || // Added in 9.0 + Name.starts_with("avx512.mask.expand.w") || // Added in 9.0 + Name.starts_with("avx512.mask.expand.d") || // Added in 9.0 + Name.starts_with("avx512.mask.expand.q") || // Added in 9.0 + Name.starts_with("avx512.mask.expand.p") || // Added in 9.0 + Name.starts_with("avx512.mask.compress.b") || // Added in 9.0 + Name.starts_with("avx512.mask.compress.w") || // Added in 9.0 + Name.starts_with("avx512.mask.compress.d") || // Added in 9.0 + Name.starts_with("avx512.mask.compress.q") || // Added in 9.0 + Name.starts_with("avx512.mask.compress.p") || // Added in 9.0 Name == "sse42.crc32.64.8" || // Added in 3.4 - Name.startswith("avx.vbroadcast.s") || // Added in 3.5 - Name.startswith("avx512.vbroadcast.s") || // Added in 7.0 - Name.startswith("avx512.mask.palignr.") || // Added in 3.9 - Name.startswith("avx512.mask.valign.") || // Added in 4.0 - Name.startswith("sse2.psll.dq") || // Added in 3.7 - Name.startswith("sse2.psrl.dq") || // Added in 3.7 - Name.startswith("avx2.psll.dq") || // Added in 3.7 - Name.startswith("avx2.psrl.dq") || // Added in 3.7 - Name.startswith("avx512.psll.dq") || // Added in 3.9 - Name.startswith("avx512.psrl.dq") || // Added in 3.9 + Name.starts_with("avx.vbroadcast.s") || // Added in 3.5 + Name.starts_with("avx512.vbroadcast.s") || // Added in 7.0 + Name.starts_with("avx512.mask.palignr.") || // Added in 3.9 + Name.starts_with("avx512.mask.valign.") || // Added in 4.0 + Name.starts_with("sse2.psll.dq") || // Added in 3.7 + Name.starts_with("sse2.psrl.dq") || // Added in 3.7 + Name.starts_with("avx2.psll.dq") || // Added in 3.7 + Name.starts_with("avx2.psrl.dq") || // Added in 3.7 + Name.starts_with("avx512.psll.dq") || // Added in 3.9 + Name.starts_with("avx512.psrl.dq") || // Added in 3.9 Name == "sse41.pblendw" || // Added in 3.7 - Name.startswith("sse41.blendp") || // Added in 3.7 - Name.startswith("avx.blend.p") || // Added in 3.7 + Name.starts_with("sse41.blendp") || // Added in 3.7 + Name.starts_with("avx.blend.p") || // Added in 3.7 Name == "avx2.pblendw" || // Added in 3.7 - Name.startswith("avx2.pblendd.") || // Added in 3.7 - Name.startswith("avx.vbroadcastf128") || // Added in 4.0 + Name.starts_with("avx2.pblendd.") || // Added in 3.7 + Name.starts_with("avx.vbroadcastf128") || // Added in 4.0 Name == "avx2.vbroadcasti128" || // Added in 3.7 - Name.startswith("avx512.mask.broadcastf32x4.") || // Added in 6.0 - Name.startswith("avx512.mask.broadcastf64x2.") || // Added in 6.0 - Name.startswith("avx512.mask.broadcastf32x8.") || // Added in 6.0 - Name.startswith("avx512.mask.broadcastf64x4.") || // Added in 6.0 - Name.startswith("avx512.mask.broadcasti32x4.") || // Added in 6.0 - Name.startswith("avx512.mask.broadcasti64x2.") || // Added in 6.0 - Name.startswith("avx512.mask.broadcasti32x8.") || // Added in 6.0 - Name.startswith("avx512.mask.broadcasti64x4.") || // Added in 6.0 + Name.starts_with("avx512.mask.broadcastf32x4.") || // Added in 6.0 + Name.starts_with("avx512.mask.broadcastf64x2.") || // Added in 6.0 + Name.starts_with("avx512.mask.broadcastf32x8.") || // Added in 6.0 + Name.starts_with("avx512.mask.broadcastf64x4.") || // Added in 6.0 + Name.starts_with("avx512.mask.broadcasti32x4.") || // Added in 6.0 + Name.starts_with("avx512.mask.broadcasti64x2.") || // Added in 6.0 + Name.starts_with("avx512.mask.broadcasti32x8.") || // Added in 6.0 + Name.starts_with("avx512.mask.broadcasti64x4.") || // Added in 6.0 Name == "xop.vpcmov" || // Added in 3.8 Name == "xop.vpcmov.256" || // Added in 5.0 - Name.startswith("avx512.mask.move.s") || // Added in 4.0 - Name.startswith("avx512.cvtmask2") || // Added in 5.0 - Name.startswith("xop.vpcom") || // Added in 3.2, Updated in 9.0 - Name.startswith("xop.vprot") || // Added in 8.0 - Name.startswith("avx512.prol") || // Added in 8.0 - Name.startswith("avx512.pror") || // Added in 8.0 - Name.startswith("avx512.mask.prorv.") || // Added in 8.0 - Name.startswith("avx512.mask.pror.") || // Added in 8.0 - Name.startswith("avx512.mask.prolv.") || // Added in 8.0 - Name.startswith("avx512.mask.prol.") || // Added in 8.0 - Name.startswith("avx512.ptestm") || //Added in 6.0 - Name.startswith("avx512.ptestnm") || //Added in 6.0 - Name.startswith("avx512.mask.pavg")) // Added in 6.0 + Name.starts_with("avx512.mask.move.s") || // Added in 4.0 + Name.starts_with("avx512.cvtmask2") || // Added in 5.0 + Name.starts_with("xop.vpcom") || // Added in 3.2, Updated in 9.0 + Name.starts_with("xop.vprot") || // Added in 8.0 + Name.starts_with("avx512.prol") || // Added in 8.0 + Name.starts_with("avx512.pror") || // Added in 8.0 + Name.starts_with("avx512.mask.prorv.") || // Added in 8.0 + Name.starts_with("avx512.mask.pror.") || // Added in 8.0 + Name.starts_with("avx512.mask.prolv.") || // Added in 8.0 + Name.starts_with("avx512.mask.prol.") || // Added in 8.0 + Name.starts_with("avx512.ptestm") || //Added in 6.0 + Name.starts_with("avx512.ptestnm") || //Added in 6.0 + Name.starts_with("avx512.mask.pavg")) // Added in 6.0 return true; return false; @@ -434,7 +434,7 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) { static bool UpgradeX86IntrinsicFunction(Function *F, StringRef Name, Function *&NewFn) { // Only handle intrinsics that start with "x86.". - if (!Name.startswith("x86.")) + if (!Name.starts_with("x86.")) return false; // Remove "x86." prefix. Name = Name.substr(4); @@ -456,7 +456,7 @@ static bool UpgradeX86IntrinsicFunction(Function *F, StringRef Name, } // SSE4.1 ptest functions may have an old signature. - if (Name.startswith("sse41.ptest")) { // Added in 3.2 + if (Name.starts_with("sse41.ptest")) { // Added in 3.2 if (Name.substr(11) == "c") return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestc, NewFn); if (Name.substr(11) == "z") @@ -504,20 +504,20 @@ static bool UpgradeX86IntrinsicFunction(Function *F, StringRef Name, NewFn); // frcz.ss/sd may need to have an argument dropped. Added in 3.2 - if (Name.startswith("xop.vfrcz.ss") && F->arg_size() == 2) { + if (Name.starts_with("xop.vfrcz.ss") && F->arg_size() == 2) { rename(F); NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::x86_xop_vfrcz_ss); return true; } - if (Name.startswith("xop.vfrcz.sd") && F->arg_size() == 2) { + if (Name.starts_with("xop.vfrcz.sd") && F->arg_size() == 2) { rename(F); NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::x86_xop_vfrcz_sd); return true; } // Upgrade any XOP PERMIL2 index operand still using a float/double vector. - if (Name.startswith("xop.vpermil2")) { // Added in 3.9 + if (Name.starts_with("xop.vpermil2")) { // Added in 3.9 auto Idx = F->getFunctionType()->getParamType(2); if (Idx->isFPOrFPVectorTy()) { rename(F); @@ -630,24 +630,24 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { // Quickly eliminate it, if it's not a candidate. StringRef Name = F->getName(); - if (Name.size() <= 7 || !Name.startswith("llvm.")) + if (Name.size() <= 7 || !Name.starts_with("llvm.")) return false; Name = Name.substr(5); // Strip off "llvm." switch (Name[0]) { default: break; case 'a': { - if (Name.startswith("arm.rbit") || Name.startswith("aarch64.rbit")) { + if (Name.starts_with("arm.rbit") || Name.starts_with("aarch64.rbit")) { NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::bitreverse, F->arg_begin()->getType()); return true; } - if (Name.startswith("aarch64.neon.frintn")) { + if (Name.starts_with("aarch64.neon.frintn")) { NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::roundeven, F->arg_begin()->getType()); return true; } - if (Name.startswith("aarch64.neon.rbit")) { + if (Name.starts_with("aarch64.neon.rbit")) { NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::bitreverse, F->arg_begin()->getType()); return true; @@ -683,13 +683,13 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { NewFn = Intrinsic::getDeclaration(F->getParent(), ID, Ty); return true; } - if (Name.startswith("aarch64.sve.tuple.get")) { + if (Name.starts_with("aarch64.sve.tuple.get")) { Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()}; NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::vector_extract, Tys); return true; } - if (Name.startswith("aarch64.sve.tuple.set")) { + if (Name.starts_with("aarch64.sve.tuple.set")) { auto Args = F->getFunctionType()->params(); Type *Tys[] = {Args[0], Args[2], Args[1]}; NewFn = Intrinsic::getDeclaration(F->getParent(), @@ -705,7 +705,7 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { Intrinsic::vector_insert, Tys); return true; } - if (Name.startswith("arm.neon.vclz")) { + if (Name.starts_with("arm.neon.vclz")) { Type* args[2] = { F->arg_begin()->getType(), Type::getInt1Ty(F->getContext()) @@ -718,7 +718,7 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { "llvm.ctlz." + Name.substr(14), F->getParent()); return true; } - if (Name.startswith("arm.neon.vcnt")) { + if (Name.starts_with("arm.neon.vcnt")) { NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop, F->arg_begin()->getType()); return true; @@ -749,27 +749,27 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::thread_pointer); return true; } - if (Name.startswith("arm.neon.vqadds.")) { + if (Name.starts_with("arm.neon.vqadds.")) { NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::sadd_sat, F->arg_begin()->getType()); return true; } - if (Name.startswith("arm.neon.vqaddu.")) { + if (Name.starts_with("arm.neon.vqaddu.")) { NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::uadd_sat, F->arg_begin()->getType()); return true; } - if (Name.startswith("arm.neon.vqsubs.")) { + if (Name.starts_with("arm.neon.vqsubs.")) { NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ssub_sat, F->arg_begin()->getType()); return true; } - if (Name.startswith("arm.neon.vqsubu.")) { + if (Name.starts_with("arm.neon.vqsubu.")) { NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::usub_sat, F->arg_begin()->getType()); return true; } - if (Name.startswith("aarch64.neon.addp")) { + if (Name.starts_with("aarch64.neon.addp")) { if (F->arg_size() != 2) break; // Invalid IR. VectorType *Ty = dyn_cast(F->getReturnType()); @@ -782,9 +782,9 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { // Changed in 12.0: bfdot accept v4bf16 and v8bf16 instead of v8i8 and v16i8 // respectively - if ((Name.startswith("arm.neon.bfdot.") || - Name.startswith("aarch64.neon.bfdot.")) && - Name.endswith("i8")) { + if ((Name.starts_with("arm.neon.bfdot.") || + Name.starts_with("aarch64.neon.bfdot.")) && + Name.ends_with("i8")) { Intrinsic::ID IID = StringSwitch(Name) .Cases("arm.neon.bfdot.v2f32.v8i8", @@ -811,9 +811,9 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { // Changed in 12.0: bfmmla, bfmlalb and bfmlalt are not polymorphic anymore // and accept v8bf16 instead of v16i8 - if ((Name.startswith("arm.neon.bfm") || - Name.startswith("aarch64.neon.bfm")) && - Name.endswith(".v4f32.v16i8")) { + if ((Name.starts_with("arm.neon.bfm") || + Name.starts_with("aarch64.neon.bfm")) && + Name.ends_with(".v4f32.v16i8")) { Intrinsic::ID IID = StringSwitch(Name) .Case("arm.neon.bfmmla.v4f32.v16i8", @@ -873,14 +873,14 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { return true; } - if (Name.startswith("atomic.inc") || Name.startswith("atomic.dec")) { + if (Name.starts_with("atomic.inc") || Name.starts_with("atomic.dec")) { // This was replaced with atomicrmw uinc_wrap and udec_wrap, so there's no // new declaration. NewFn = nullptr; return true; } - if (Name.startswith("ldexp.")) { + if (Name.starts_with("ldexp.")) { // Target specific intrinsic became redundant NewFn = Intrinsic::getDeclaration( F->getParent(), Intrinsic::ldexp, @@ -892,13 +892,13 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { break; } case 'c': { - if (Name.startswith("ctlz.") && F->arg_size() == 1) { + if (Name.starts_with("ctlz.") && F->arg_size() == 1) { rename(F); NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz, F->arg_begin()->getType()); return true; } - if (Name.startswith("cttz.") && F->arg_size() == 1) { + if (Name.starts_with("cttz.") && F->arg_size() == 1) { rename(F); NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::cttz, F->arg_begin()->getType()); @@ -985,14 +985,14 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { } break; // No other 'e*'. case 'f': - if (Name.startswith("flt.rounds")) { + if (Name.starts_with("flt.rounds")) { rename(F); NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::get_rounding); return true; } break; case 'i': - if (Name.startswith("invariant.group.barrier")) { + if (Name.starts_with("invariant.group.barrier")) { // Rename invariant.group.barrier to launder.invariant.group auto Args = F->getFunctionType()->params(); Type* ObjectPtr[1] = {Args[0]}; @@ -1019,7 +1019,7 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { return true; } } - if (Name.startswith("memset.") && F->arg_size() == 5) { + if (Name.starts_with("memset.") && F->arg_size() == 5) { rename(F); // Get the types of dest, and len const auto *FT = F->getFunctionType(); @@ -1074,7 +1074,7 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { Expand = Name == "i" || Name == "ll" || Name == "ui" || Name == "ull"; else if (Name.consume_front("atomic.load.add.")) // nvvm.atomic.load.add.{f32.p,f64.p} - Expand = Name.startswith("f32.p") || Name.startswith("f64.p"); + Expand = Name.starts_with("f32.p") || Name.starts_with("f64.p"); else Expand = false; @@ -1089,7 +1089,7 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { case 'o': // We only need to change the name to match the mangling including the // address space. - if (Name.startswith("objectsize.")) { + if (Name.starts_with("objectsize.")) { Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() }; if (F->arg_size() == 2 || F->arg_size() == 3 || F->getName() != @@ -1103,7 +1103,7 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { break; case 'p': - if (Name.startswith("ptr.annotation.") && F->arg_size() == 4) { + if (Name.starts_with("ptr.annotation.") && F->arg_size() == 4) { rename(F); NewFn = Intrinsic::getDeclaration( F->getParent(), Intrinsic::ptr_annotation, @@ -1819,7 +1819,7 @@ static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder, unsigned VecWidth = CI.getType()->getPrimitiveSizeInBits(); unsigned EltWidth = CI.getType()->getScalarSizeInBits(); Intrinsic::ID IID; - if (Name.startswith("max.p")) { + if (Name.starts_with("max.p")) { if (VecWidth == 128 && EltWidth == 32) IID = Intrinsic::x86_sse_max_ps; else if (VecWidth == 128 && EltWidth == 64) @@ -1830,7 +1830,7 @@ static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder, IID = Intrinsic::x86_avx_max_pd_256; else llvm_unreachable("Unexpected intrinsic"); - } else if (Name.startswith("min.p")) { + } else if (Name.starts_with("min.p")) { if (VecWidth == 128 && EltWidth == 32) IID = Intrinsic::x86_sse_min_ps; else if (VecWidth == 128 && EltWidth == 64) @@ -1841,7 +1841,7 @@ static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder, IID = Intrinsic::x86_avx_min_pd_256; else llvm_unreachable("Unexpected intrinsic"); - } else if (Name.startswith("pshuf.b.")) { + } else if (Name.starts_with("pshuf.b.")) { if (VecWidth == 128) IID = Intrinsic::x86_ssse3_pshuf_b_128; else if (VecWidth == 256) @@ -1850,7 +1850,7 @@ static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder, IID = Intrinsic::x86_avx512_pshuf_b_512; else llvm_unreachable("Unexpected intrinsic"); - } else if (Name.startswith("pmul.hr.sw.")) { + } else if (Name.starts_with("pmul.hr.sw.")) { if (VecWidth == 128) IID = Intrinsic::x86_ssse3_pmul_hr_sw_128; else if (VecWidth == 256) @@ -1859,7 +1859,7 @@ static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder, IID = Intrinsic::x86_avx512_pmul_hr_sw_512; else llvm_unreachable("Unexpected intrinsic"); - } else if (Name.startswith("pmulh.w.")) { + } else if (Name.starts_with("pmulh.w.")) { if (VecWidth == 128) IID = Intrinsic::x86_sse2_pmulh_w; else if (VecWidth == 256) @@ -1868,7 +1868,7 @@ static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder, IID = Intrinsic::x86_avx512_pmulh_w_512; else llvm_unreachable("Unexpected intrinsic"); - } else if (Name.startswith("pmulhu.w.")) { + } else if (Name.starts_with("pmulhu.w.")) { if (VecWidth == 128) IID = Intrinsic::x86_sse2_pmulhu_w; else if (VecWidth == 256) @@ -1877,7 +1877,7 @@ static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder, IID = Intrinsic::x86_avx512_pmulhu_w_512; else llvm_unreachable("Unexpected intrinsic"); - } else if (Name.startswith("pmaddw.d.")) { + } else if (Name.starts_with("pmaddw.d.")) { if (VecWidth == 128) IID = Intrinsic::x86_sse2_pmadd_wd; else if (VecWidth == 256) @@ -1886,7 +1886,7 @@ static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder, IID = Intrinsic::x86_avx512_pmaddw_d_512; else llvm_unreachable("Unexpected intrinsic"); - } else if (Name.startswith("pmaddubs.w.")) { + } else if (Name.starts_with("pmaddubs.w.")) { if (VecWidth == 128) IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128; else if (VecWidth == 256) @@ -1895,7 +1895,7 @@ static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder, IID = Intrinsic::x86_avx512_pmaddubs_w_512; else llvm_unreachable("Unexpected intrinsic"); - } else if (Name.startswith("packsswb.")) { + } else if (Name.starts_with("packsswb.")) { if (VecWidth == 128) IID = Intrinsic::x86_sse2_packsswb_128; else if (VecWidth == 256) @@ -1904,7 +1904,7 @@ static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder, IID = Intrinsic::x86_avx512_packsswb_512; else llvm_unreachable("Unexpected intrinsic"); - } else if (Name.startswith("packssdw.")) { + } else if (Name.starts_with("packssdw.")) { if (VecWidth == 128) IID = Intrinsic::x86_sse2_packssdw_128; else if (VecWidth == 256) @@ -1913,7 +1913,7 @@ static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder, IID = Intrinsic::x86_avx512_packssdw_512; else llvm_unreachable("Unexpected intrinsic"); - } else if (Name.startswith("packuswb.")) { + } else if (Name.starts_with("packuswb.")) { if (VecWidth == 128) IID = Intrinsic::x86_sse2_packuswb_128; else if (VecWidth == 256) @@ -1922,7 +1922,7 @@ static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder, IID = Intrinsic::x86_avx512_packuswb_512; else llvm_unreachable("Unexpected intrinsic"); - } else if (Name.startswith("packusdw.")) { + } else if (Name.starts_with("packusdw.")) { if (VecWidth == 128) IID = Intrinsic::x86_sse41_packusdw; else if (VecWidth == 256) @@ -1931,7 +1931,7 @@ static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder, IID = Intrinsic::x86_avx512_packusdw_512; else llvm_unreachable("Unexpected intrinsic"); - } else if (Name.startswith("vpermilvar.")) { + } else if (Name.starts_with("vpermilvar.")) { if (VecWidth == 128 && EltWidth == 32) IID = Intrinsic::x86_avx_vpermilvar_ps; else if (VecWidth == 128 && EltWidth == 64) @@ -1956,7 +1956,7 @@ static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder, IID = Intrinsic::x86_sse2_cvttps2dq; } else if (Name == "cvttps2dq.256") { IID = Intrinsic::x86_avx_cvtt_ps2dq_256; - } else if (Name.startswith("permvar.")) { + } else if (Name.starts_with("permvar.")) { bool IsFloat = CI.getType()->isFPOrFPVectorTy(); if (VecWidth == 256 && EltWidth == 32 && IsFloat) IID = Intrinsic::x86_avx2_permps; @@ -1988,7 +1988,7 @@ static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder, IID = Intrinsic::x86_avx512_permvar_qi_512; else llvm_unreachable("Unexpected intrinsic"); - } else if (Name.startswith("dbpsadbw.")) { + } else if (Name.starts_with("dbpsadbw.")) { if (VecWidth == 128) IID = Intrinsic::x86_avx512_dbpsadbw_128; else if (VecWidth == 256) @@ -1997,7 +1997,7 @@ static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder, IID = Intrinsic::x86_avx512_dbpsadbw_512; else llvm_unreachable("Unexpected intrinsic"); - } else if (Name.startswith("pmultishift.qb.")) { + } else if (Name.starts_with("pmultishift.qb.")) { if (VecWidth == 128) IID = Intrinsic::x86_avx512_pmultishift_qb_128; else if (VecWidth == 256) @@ -2006,7 +2006,7 @@ static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder, IID = Intrinsic::x86_avx512_pmultishift_qb_512; else llvm_unreachable("Unexpected intrinsic"); - } else if (Name.startswith("conflict.")) { + } else if (Name.starts_with("conflict.")) { if (Name[9] == 'd' && VecWidth == 128) IID = Intrinsic::x86_avx512_conflict_d_128; else if (Name[9] == 'd' && VecWidth == 256) @@ -2021,7 +2021,7 @@ static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder, IID = Intrinsic::x86_avx512_conflict_q_512; else llvm_unreachable("Unexpected intrinsic"); - } else if (Name.startswith("pavg.")) { + } else if (Name.starts_with("pavg.")) { if (Name[5] == 'b' && VecWidth == 128) IID = Intrinsic::x86_sse2_pavg_b; else if (Name[5] == 'b' && VecWidth == 256) @@ -2157,8 +2157,8 @@ static Value *UpgradeARMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, static Value *UpgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder) { - const bool IsInc = Name.startswith("atomic.inc."); - if (IsInc || Name.startswith("atomic.dec.")) { + const bool IsInc = Name.starts_with("atomic.inc."); + if (IsInc || Name.starts_with("atomic.dec.")) { if (CI->getNumOperands() != 6) // Malformed bitcode. return nullptr; @@ -2209,23 +2209,23 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) { // Get the Function's name. StringRef Name = F->getName(); - assert(Name.startswith("llvm.") && "Intrinsic doesn't start with 'llvm.'"); + assert(Name.starts_with("llvm.") && "Intrinsic doesn't start with 'llvm.'"); Name = Name.substr(5); - bool IsX86 = Name.startswith("x86."); + bool IsX86 = Name.starts_with("x86."); if (IsX86) Name = Name.substr(4); - bool IsNVVM = Name.startswith("nvvm."); + bool IsNVVM = Name.starts_with("nvvm."); if (IsNVVM) Name = Name.substr(5); - bool IsARM = Name.startswith("arm."); + bool IsARM = Name.starts_with("arm."); if (IsARM) Name = Name.substr(4); - bool IsAMDGCN = Name.startswith("amdgcn."); + bool IsAMDGCN = Name.starts_with("amdgcn."); if (IsAMDGCN) Name = Name.substr(7); - if (IsX86 && Name.startswith("sse4a.movnt.")) { + if (IsX86 && Name.starts_with("sse4a.movnt.")) { SmallVector Elts; Elts.push_back( ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1))); @@ -2250,8 +2250,8 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) { return; } - if (IsX86 && (Name.startswith("avx.movnt.") || - Name.startswith("avx512.storent."))) { + if (IsX86 && (Name.starts_with("avx.movnt.") || + Name.starts_with("avx512.storent."))) { SmallVector Elts; Elts.push_back( ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1))); @@ -2291,9 +2291,9 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) { return; } - if (IsX86 && (Name.startswith("sse.storeu.") || - Name.startswith("sse2.storeu.") || - Name.startswith("avx.storeu."))) { + if (IsX86 && (Name.starts_with("sse.storeu.") || + Name.starts_with("sse2.storeu.") || + Name.starts_with("avx.storeu."))) { Value *Arg0 = CI->getArgOperand(0); Value *Arg1 = CI->getArgOperand(1); @@ -2317,7 +2317,7 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) { return; } - if (IsX86 && (Name.startswith("avx512.mask.store"))) { + if (IsX86 && (Name.starts_with("avx512.mask.store"))) { // "avx512.mask.storeu." or "avx512.mask.store." bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu". UpgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1), @@ -2330,14 +2330,14 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) { Value *Rep; // Upgrade packed integer vector compare intrinsics to compare instructions. - if (IsX86 && (Name.startswith("sse2.pcmp") || - Name.startswith("avx2.pcmp"))) { + if (IsX86 && (Name.starts_with("sse2.pcmp") || + Name.starts_with("avx2.pcmp"))) { // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt." bool CmpEq = Name[9] == 'e'; Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT, CI->getArgOperand(0), CI->getArgOperand(1)); Rep = Builder.CreateSExt(Rep, CI->getType(), ""); - } else if (IsX86 && (Name.startswith("avx512.broadcastm"))) { + } else if (IsX86 && (Name.starts_with("avx512.broadcastm"))) { Type *ExtTy = Type::getInt32Ty(C); if (CI->getOperand(0)->getType()->isIntegerTy(8)) ExtTy = Type::getInt64Ty(C); @@ -2353,14 +2353,14 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) { Intrinsic::sqrt, Elt0->getType()); Elt0 = Builder.CreateCall(Intr, Elt0); Rep = Builder.CreateInsertElement(Vec, Elt0, (uint64_t)0); - } else if (IsX86 && (Name.startswith("avx.sqrt.p") || - Name.startswith("sse2.sqrt.p") || - Name.startswith("sse.sqrt.p"))) { + } else if (IsX86 && (Name.starts_with("avx.sqrt.p") || + Name.starts_with("sse2.sqrt.p") || + Name.starts_with("sse.sqrt.p"))) { Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), Intrinsic::sqrt, CI->getType()), {CI->getArgOperand(0)}); - } else if (IsX86 && (Name.startswith("avx512.mask.sqrt.p"))) { + } else if (IsX86 && (Name.starts_with("avx512.mask.sqrt.p"))) { if (CI->arg_size() == 4 && (!isa(CI->getArgOperand(3)) || cast(CI->getArgOperand(3))->getZExtValue() != 4)) { @@ -2378,8 +2378,8 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) { } Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1)); - } else if (IsX86 && (Name.startswith("avx512.ptestm") || - Name.startswith("avx512.ptestnm"))) { + } else if (IsX86 && (Name.starts_with("avx512.ptestm") || + Name.starts_with("avx512.ptestnm"))) { Value *Op0 = CI->getArgOperand(0); Value *Op1 = CI->getArgOperand(1); Value *Mask = CI->getArgOperand(2); @@ -2387,16 +2387,16 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) { llvm::Type *Ty = Op0->getType(); Value *Zero = llvm::Constant::getNullValue(Ty); ICmpInst::Predicate Pred = - Name.startswith("avx512.ptestm") ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ; + Name.starts_with("avx512.ptestm") ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ; Rep = Builder.CreateICmp(Pred, Rep, Zero); Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, Mask); - } else if (IsX86 && (Name.startswith("avx512.mask.pbroadcast"))){ + } else if (IsX86 && (Name.starts_with("avx512.mask.pbroadcast"))){ unsigned NumElts = cast(CI->getArgOperand(1)->getType()) ->getNumElements(); Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0)); Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1)); - } else if (IsX86 && (Name.startswith("avx512.kunpck"))) { + } else if (IsX86 && (Name.starts_with("avx512.kunpck"))) { unsigned NumElts = CI->getType()->getScalarSizeInBits(); Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), NumElts); Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), NumElts); @@ -2478,11 +2478,11 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) { EltOp = Builder.CreateFDiv(Elt0, Elt1); Rep = Builder.CreateInsertElement(CI->getArgOperand(0), EltOp, ConstantInt::get(I32Ty, 0)); - } else if (IsX86 && Name.startswith("avx512.mask.pcmp")) { + } else if (IsX86 && Name.starts_with("avx512.mask.pcmp")) { // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt." bool CmpEq = Name[16] == 'e'; Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true); - } else if (IsX86 && Name.startswith("avx512.mask.vpshufbitqmb.")) { + } else if (IsX86 && Name.starts_with("avx512.mask.vpshufbitqmb.")) { Type *OpTy = CI->getArgOperand(0)->getType(); unsigned VecWidth = OpTy->getPrimitiveSizeInBits(); Intrinsic::ID IID; @@ -2496,7 +2496,7 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) { Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID), { CI->getOperand(0), CI->getArgOperand(1) }); Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2)); - } else if (IsX86 && Name.startswith("avx512.mask.fpclass.p")) { + } else if (IsX86 && Name.starts_with("avx512.mask.fpclass.p")) { Type *OpTy = CI->getArgOperand(0)->getType(); unsigned VecWidth = OpTy->getPrimitiveSizeInBits(); unsigned EltWidth = OpTy->getScalarSizeInBits(); @@ -2519,7 +2519,7 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) { Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID), { CI->getOperand(0), CI->getArgOperand(1) }); Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2)); - } else if (IsX86 && Name.startswith("avx512.cmp.p")) { + } else if (IsX86 && Name.starts_with("avx512.cmp.p")) { SmallVector Args(CI->args()); Type *OpTy = Args[0]->getType(); unsigned VecWidth = OpTy->getPrimitiveSizeInBits(); @@ -2547,17 +2547,17 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) { Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID), Args); - } else if (IsX86 && Name.startswith("avx512.mask.cmp.")) { + } else if (IsX86 && Name.starts_with("avx512.mask.cmp.")) { // Integer compare intrinsics. unsigned Imm = cast(CI->getArgOperand(2))->getZExtValue(); Rep = upgradeMaskedCompare(Builder, *CI, Imm, true); - } else if (IsX86 && Name.startswith("avx512.mask.ucmp.")) { + } else if (IsX86 && Name.starts_with("avx512.mask.ucmp.")) { unsigned Imm = cast(CI->getArgOperand(2))->getZExtValue(); Rep = upgradeMaskedCompare(Builder, *CI, Imm, false); - } else if (IsX86 && (Name.startswith("avx512.cvtb2mask.") || - Name.startswith("avx512.cvtw2mask.") || - Name.startswith("avx512.cvtd2mask.") || - Name.startswith("avx512.cvtq2mask."))) { + } else if (IsX86 && (Name.starts_with("avx512.cvtb2mask.") || + Name.starts_with("avx512.cvtw2mask.") || + Name.starts_with("avx512.cvtd2mask.") || + Name.starts_with("avx512.cvtq2mask."))) { Value *Op = CI->getArgOperand(0); Value *Zero = llvm::Constant::getNullValue(Op->getType()); Rep = Builder.CreateICmp(ICmpInst::ICMP_SLT, Op, Zero); @@ -2565,42 +2565,42 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) { } else if(IsX86 && (Name == "ssse3.pabs.b.128" || Name == "ssse3.pabs.w.128" || Name == "ssse3.pabs.d.128" || - Name.startswith("avx2.pabs") || - Name.startswith("avx512.mask.pabs"))) { + Name.starts_with("avx2.pabs") || + Name.starts_with("avx512.mask.pabs"))) { Rep = upgradeAbs(Builder, *CI); } else if (IsX86 && (Name == "sse41.pmaxsb" || Name == "sse2.pmaxs.w" || Name == "sse41.pmaxsd" || - Name.startswith("avx2.pmaxs") || - Name.startswith("avx512.mask.pmaxs"))) { + Name.starts_with("avx2.pmaxs") || + Name.starts_with("avx512.mask.pmaxs"))) { Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smax); } else if (IsX86 && (Name == "sse2.pmaxu.b" || Name == "sse41.pmaxuw" || Name == "sse41.pmaxud" || - Name.startswith("avx2.pmaxu") || - Name.startswith("avx512.mask.pmaxu"))) { + Name.starts_with("avx2.pmaxu") || + Name.starts_with("avx512.mask.pmaxu"))) { Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umax); } else if (IsX86 && (Name == "sse41.pminsb" || Name == "sse2.pmins.w" || Name == "sse41.pminsd" || - Name.startswith("avx2.pmins") || - Name.startswith("avx512.mask.pmins"))) { + Name.starts_with("avx2.pmins") || + Name.starts_with("avx512.mask.pmins"))) { Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smin); } else if (IsX86 && (Name == "sse2.pminu.b" || Name == "sse41.pminuw" || Name == "sse41.pminud" || - Name.startswith("avx2.pminu") || - Name.startswith("avx512.mask.pminu"))) { + Name.starts_with("avx2.pminu") || + Name.starts_with("avx512.mask.pminu"))) { Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umin); } else if (IsX86 && (Name == "sse2.pmulu.dq" || Name == "avx2.pmulu.dq" || Name == "avx512.pmulu.dq.512" || - Name.startswith("avx512.mask.pmulu.dq."))) { + Name.starts_with("avx512.mask.pmulu.dq."))) { Rep = upgradePMULDQ(Builder, *CI, /*Signed*/false); } else if (IsX86 && (Name == "sse41.pmuldq" || Name == "avx2.pmul.dq" || Name == "avx512.pmul.dq.512" || - Name.startswith("avx512.mask.pmul.dq."))) { + Name.starts_with("avx512.mask.pmul.dq."))) { Rep = upgradePMULDQ(Builder, *CI, /*Signed*/true); } else if (IsX86 && (Name == "sse.cvtsi2ss" || Name == "sse2.cvtsi2sd" || @@ -2624,12 +2624,12 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) { Name == "sse2.cvtdq2ps" || Name == "avx.cvtdq2.pd.256" || Name == "avx.cvtdq2.ps.256" || - Name.startswith("avx512.mask.cvtdq2pd.") || - Name.startswith("avx512.mask.cvtudq2pd.") || - Name.startswith("avx512.mask.cvtdq2ps.") || - Name.startswith("avx512.mask.cvtudq2ps.") || - Name.startswith("avx512.mask.cvtqq2pd.") || - Name.startswith("avx512.mask.cvtuqq2pd.") || + Name.starts_with("avx512.mask.cvtdq2pd.") || + Name.starts_with("avx512.mask.cvtudq2pd.") || + Name.starts_with("avx512.mask.cvtdq2ps.") || + Name.starts_with("avx512.mask.cvtudq2ps.") || + Name.starts_with("avx512.mask.cvtqq2pd.") || + Name.starts_with("avx512.mask.cvtuqq2pd.") || Name == "avx512.mask.cvtqq2ps.256" || Name == "avx512.mask.cvtqq2ps.512" || Name == "avx512.mask.cvtuqq2ps.256" || @@ -2668,8 +2668,8 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) { if (CI->arg_size() >= 3) Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1)); - } else if (IsX86 && (Name.startswith("avx512.mask.vcvtph2ps.") || - Name.startswith("vcvtph2ps."))) { + } else if (IsX86 && (Name.starts_with("avx512.mask.vcvtph2ps.") || + Name.starts_with("vcvtph2ps."))) { auto *DstTy = cast(CI->getType()); Rep = CI->getArgOperand(0); auto *SrcTy = cast(Rep->getType()); @@ -2684,13 +2684,13 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) { if (CI->arg_size() >= 3) Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1)); - } else if (IsX86 && Name.startswith("avx512.mask.load")) { + } else if (IsX86 && Name.starts_with("avx512.mask.load")) { // "avx512.mask.loadu." or "avx512.mask.load." bool Aligned = Name[16] != 'u'; // "avx512.mask.loadu". Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2), Aligned); - } else if (IsX86 && Name.startswith("avx512.mask.expand.load.")) { + } else if (IsX86 && Name.starts_with("avx512.mask.expand.load.")) { auto *ResultTy = cast(CI->getType()); Type *PtrTy = ResultTy->getElementType(); @@ -2705,7 +2705,7 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) { Intrinsic::masked_expandload, ResultTy); Rep = Builder.CreateCall(ELd, { Ptr, MaskVec, CI->getOperand(1) }); - } else if (IsX86 && Name.startswith("avx512.mask.compress.store.")) { + } else if (IsX86 && Name.starts_with("avx512.mask.compress.store.")) { auto *ResultTy = cast(CI->getArgOperand(1)->getType()); Type *PtrTy = ResultTy->getElementType(); @@ -2721,8 +2721,8 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) { Intrinsic::masked_compressstore, ResultTy); Rep = Builder.CreateCall(CSt, { CI->getArgOperand(1), Ptr, MaskVec }); - } else if (IsX86 && (Name.startswith("avx512.mask.compress.") || - Name.startswith("avx512.mask.expand."))) { + } else if (IsX86 && (Name.starts_with("avx512.mask.compress.") || + Name.starts_with("avx512.mask.expand."))) { auto *ResultTy = cast(CI->getType()); Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2), @@ -2734,13 +2734,13 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) { Function *Intr = Intrinsic::getDeclaration(F->getParent(), IID, ResultTy); Rep = Builder.CreateCall(Intr, { CI->getOperand(0), CI->getOperand(1), MaskVec }); - } else if (IsX86 && Name.startswith("xop.vpcom")) { + } else if (IsX86 && Name.starts_with("xop.vpcom")) { bool IsSigned; - if (Name.endswith("ub") || Name.endswith("uw") || Name.endswith("ud") || - Name.endswith("uq")) + if (Name.ends_with("ub") || Name.ends_with("uw") || Name.ends_with("ud") || + Name.ends_with("uq")) IsSigned = false; - else if (Name.endswith("b") || Name.endswith("w") || Name.endswith("d") || - Name.endswith("q")) + else if (Name.ends_with("b") || Name.ends_with("w") || Name.ends_with("d") || + Name.ends_with("q")) IsSigned = true; else llvm_unreachable("Unknown suffix"); @@ -2750,48 +2750,48 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) { Imm = cast(CI->getArgOperand(2))->getZExtValue(); } else { Name = Name.substr(9); // strip off "xop.vpcom" - if (Name.startswith("lt")) + if (Name.starts_with("lt")) Imm = 0; - else if (Name.startswith("le")) + else if (Name.starts_with("le")) Imm = 1; - else if (Name.startswith("gt")) + else if (Name.starts_with("gt")) Imm = 2; - else if (Name.startswith("ge")) + else if (Name.starts_with("ge")) Imm = 3; - else if (Name.startswith("eq")) + else if (Name.starts_with("eq")) Imm = 4; - else if (Name.startswith("ne")) + else if (Name.starts_with("ne")) Imm = 5; - else if (Name.startswith("false")) + else if (Name.starts_with("false")) Imm = 6; - else if (Name.startswith("true")) + else if (Name.starts_with("true")) Imm = 7; else llvm_unreachable("Unknown condition"); } Rep = upgradeX86vpcom(Builder, *CI, Imm, IsSigned); - } else if (IsX86 && Name.startswith("xop.vpcmov")) { + } else if (IsX86 && Name.starts_with("xop.vpcmov")) { Value *Sel = CI->getArgOperand(2); Value *NotSel = Builder.CreateNot(Sel); Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel); Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel); Rep = Builder.CreateOr(Sel0, Sel1); - } else if (IsX86 && (Name.startswith("xop.vprot") || - Name.startswith("avx512.prol") || - Name.startswith("avx512.mask.prol"))) { + } else if (IsX86 && (Name.starts_with("xop.vprot") || + Name.starts_with("avx512.prol") || + Name.starts_with("avx512.mask.prol"))) { Rep = upgradeX86Rotate(Builder, *CI, false); - } else if (IsX86 && (Name.startswith("avx512.pror") || - Name.startswith("avx512.mask.pror"))) { + } else if (IsX86 && (Name.starts_with("avx512.pror") || + Name.starts_with("avx512.mask.pror"))) { Rep = upgradeX86Rotate(Builder, *CI, true); - } else if (IsX86 && (Name.startswith("avx512.vpshld.") || - Name.startswith("avx512.mask.vpshld") || - Name.startswith("avx512.maskz.vpshld"))) { + } else if (IsX86 && (Name.starts_with("avx512.vpshld.") || + Name.starts_with("avx512.mask.vpshld") || + Name.starts_with("avx512.maskz.vpshld"))) { bool ZeroMask = Name[11] == 'z'; Rep = upgradeX86ConcatShift(Builder, *CI, false, ZeroMask); - } else if (IsX86 && (Name.startswith("avx512.vpshrd.") || - Name.startswith("avx512.mask.vpshrd") || - Name.startswith("avx512.maskz.vpshrd"))) { + } else if (IsX86 && (Name.starts_with("avx512.vpshrd.") || + Name.starts_with("avx512.mask.vpshrd") || + Name.starts_with("avx512.maskz.vpshrd"))) { bool ZeroMask = Name[11] == 'z'; Rep = upgradeX86ConcatShift(Builder, *CI, true, ZeroMask); } else if (IsX86 && Name == "sse42.crc32.64.8") { @@ -2800,8 +2800,8 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) { Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C)); Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)}); Rep = Builder.CreateZExt(Rep, CI->getType(), ""); - } else if (IsX86 && (Name.startswith("avx.vbroadcast.s") || - Name.startswith("avx512.vbroadcast.s"))) { + } else if (IsX86 && (Name.starts_with("avx.vbroadcast.s") || + Name.starts_with("avx512.vbroadcast.s"))) { // Replace broadcasts with a series of insertelements. auto *VecTy = cast(CI->getType()); Type *EltTy = VecTy->getElementType(); @@ -2812,12 +2812,12 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) { for (unsigned I = 0; I < EltNum; ++I) Rep = Builder.CreateInsertElement(Rep, Load, ConstantInt::get(I32Ty, I)); - } else if (IsX86 && (Name.startswith("sse41.pmovsx") || - Name.startswith("sse41.pmovzx") || - Name.startswith("avx2.pmovsx") || - Name.startswith("avx2.pmovzx") || - Name.startswith("avx512.mask.pmovsx") || - Name.startswith("avx512.mask.pmovzx"))) { + } else if (IsX86 && (Name.starts_with("sse41.pmovsx") || + Name.starts_with("sse41.pmovzx") || + Name.starts_with("avx2.pmovsx") || + Name.starts_with("avx2.pmovzx") || + Name.starts_with("avx512.mask.pmovsx") || + Name.starts_with("avx512.mask.pmovzx"))) { auto *DstTy = cast(CI->getType()); unsigned NumDstElts = DstTy->getNumElements(); @@ -2844,7 +2844,7 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) { Rep = Builder.CreateTrunc(CI->getArgOperand(0), Ty); Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1)); - } else if (IsX86 && (Name.startswith("avx.vbroadcastf128") || + } else if (IsX86 && (Name.starts_with("avx.vbroadcastf128") || Name == "avx2.vbroadcasti128")) { // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle. Type *EltTy = cast(CI->getType())->getElementType(); @@ -2858,8 +2858,8 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) { else Rep = Builder.CreateShuffleVector( Load, ArrayRef{0, 1, 2, 3, 0, 1, 2, 3}); - } else if (IsX86 && (Name.startswith("avx512.mask.shuf.i") || - Name.startswith("avx512.mask.shuf.f"))) { + } else if (IsX86 && (Name.starts_with("avx512.mask.shuf.i") || + Name.starts_with("avx512.mask.shuf.f"))) { unsigned Imm = cast(CI->getArgOperand(2))->getZExtValue(); Type *VT = CI->getType(); unsigned NumLanes = VT->getPrimitiveSizeInBits() / 128; @@ -2880,8 +2880,8 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) { CI->getArgOperand(1), ShuffleMask); Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep, CI->getArgOperand(3)); - }else if (IsX86 && (Name.startswith("avx512.mask.broadcastf") || - Name.startswith("avx512.mask.broadcasti"))) { + }else if (IsX86 && (Name.starts_with("avx512.mask.broadcastf") || + Name.starts_with("avx512.mask.broadcasti"))) { unsigned NumSrcElts = cast(CI->getArgOperand(0)->getType()) ->getNumElements(); @@ -2897,10 +2897,10 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) { ShuffleMask); Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1)); - } else if (IsX86 && (Name.startswith("avx2.pbroadcast") || - Name.startswith("avx2.vbroadcast") || - Name.startswith("avx512.pbroadcast") || - Name.startswith("avx512.mask.broadcast.s"))) { + } else if (IsX86 && (Name.starts_with("avx2.pbroadcast") || + Name.starts_with("avx2.vbroadcast") || + Name.starts_with("avx512.pbroadcast") || + Name.starts_with("avx512.mask.broadcast.s"))) { // Replace vp?broadcasts with a vector shuffle. Value *Op = CI->getArgOperand(0); ElementCount EC = cast(CI->getType())->getElementCount(); @@ -2912,32 +2912,32 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) { if (CI->arg_size() == 3) Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1)); - } else if (IsX86 && (Name.startswith("sse2.padds.") || - Name.startswith("avx2.padds.") || - Name.startswith("avx512.padds.") || - Name.startswith("avx512.mask.padds."))) { + } else if (IsX86 && (Name.starts_with("sse2.padds.") || + Name.starts_with("avx2.padds.") || + Name.starts_with("avx512.padds.") || + Name.starts_with("avx512.mask.padds."))) { Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::sadd_sat); - } else if (IsX86 && (Name.startswith("sse2.psubs.") || - Name.startswith("avx2.psubs.") || - Name.startswith("avx512.psubs.") || - Name.startswith("avx512.mask.psubs."))) { + } else if (IsX86 && (Name.starts_with("sse2.psubs.") || + Name.starts_with("avx2.psubs.") || + Name.starts_with("avx512.psubs.") || + Name.starts_with("avx512.mask.psubs."))) { Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::ssub_sat); - } else if (IsX86 && (Name.startswith("sse2.paddus.") || - Name.startswith("avx2.paddus.") || - Name.startswith("avx512.mask.paddus."))) { + } else if (IsX86 && (Name.starts_with("sse2.paddus.") || + Name.starts_with("avx2.paddus.") || + Name.starts_with("avx512.mask.paddus."))) { Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::uadd_sat); - } else if (IsX86 && (Name.startswith("sse2.psubus.") || - Name.startswith("avx2.psubus.") || - Name.startswith("avx512.mask.psubus."))) { + } else if (IsX86 && (Name.starts_with("sse2.psubus.") || + Name.starts_with("avx2.psubus.") || + Name.starts_with("avx512.mask.psubus."))) { Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::usub_sat); - } else if (IsX86 && Name.startswith("avx512.mask.palignr.")) { + } else if (IsX86 && Name.starts_with("avx512.mask.palignr.")) { Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2), CI->getArgOperand(3), CI->getArgOperand(4), false); - } else if (IsX86 && Name.startswith("avx512.mask.valign.")) { + } else if (IsX86 && Name.starts_with("avx512.mask.valign.")) { Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2), @@ -2969,10 +2969,10 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) { unsigned Shift = cast(CI->getArgOperand(1))->getZExtValue(); Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), Shift); } else if (IsX86 && (Name == "sse41.pblendw" || - Name.startswith("sse41.blendp") || - Name.startswith("avx.blend.p") || + Name.starts_with("sse41.blendp") || + Name.starts_with("avx.blend.p") || Name == "avx2.pblendw" || - Name.startswith("avx2.pblendd."))) { + Name.starts_with("avx2.pblendd."))) { Value *Op0 = CI->getArgOperand(0); Value *Op1 = CI->getArgOperand(1); unsigned Imm = cast (CI->getArgOperand(2))->getZExtValue(); @@ -2984,9 +2984,9 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) { Idxs[i] = ((Imm >> (i%8)) & 1) ? i + NumElts : i; Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs); - } else if (IsX86 && (Name.startswith("avx.vinsertf128.") || + } else if (IsX86 && (Name.starts_with("avx.vinsertf128.") || Name == "avx2.vinserti128" || - Name.startswith("avx512.mask.insert"))) { + Name.starts_with("avx512.mask.insert"))) { Value *Op0 = CI->getArgOperand(0); Value *Op1 = CI->getArgOperand(1); unsigned Imm = cast(CI->getArgOperand(2))->getZExtValue(); @@ -3030,9 +3030,9 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) { if (CI->arg_size() == 5) Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep, CI->getArgOperand(3)); - } else if (IsX86 && (Name.startswith("avx.vextractf128.") || + } else if (IsX86 && (Name.starts_with("avx.vextractf128.") || Name == "avx2.vextracti128" || - Name.startswith("avx512.mask.vextract"))) { + Name.starts_with("avx512.mask.vextract"))) { Value *Op0 = CI->getArgOperand(0); unsigned Imm = cast(CI->getArgOperand(1))->getZExtValue(); unsigned DstNumElts = @@ -3057,8 +3057,8 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) { CI->getArgOperand(2)); } else if (!IsX86 && Name == "stackprotectorcheck") { Rep = nullptr; - } else if (IsX86 && (Name.startswith("avx512.mask.perm.df.") || - Name.startswith("avx512.mask.perm.di."))) { + } else if (IsX86 && (Name.starts_with("avx512.mask.perm.df.") || + Name.starts_with("avx512.mask.perm.di."))) { Value *Op0 = CI->getArgOperand(0); unsigned Imm = cast(CI->getArgOperand(1))->getZExtValue(); auto *VecTy = cast(CI->getType()); @@ -3073,7 +3073,7 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) { if (CI->arg_size() == 4) Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2)); - } else if (IsX86 && (Name.startswith("avx.vperm2f128.") || + } else if (IsX86 && (Name.starts_with("avx.vperm2f128.") || Name == "avx2.vperm2i128")) { // The immediate permute control byte looks like this: // [1:0] - select 128 bits from sources for low half of destination @@ -3109,10 +3109,10 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) { Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask); - } else if (IsX86 && (Name.startswith("avx.vpermil.") || + } else if (IsX86 && (Name.starts_with("avx.vpermil.") || Name == "sse2.pshuf.d" || - Name.startswith("avx512.mask.vpermil.p") || - Name.startswith("avx512.mask.pshuf.d."))) { + Name.starts_with("avx512.mask.vpermil.p") || + Name.starts_with("avx512.mask.pshuf.d."))) { Value *Op0 = CI->getArgOperand(0); unsigned Imm = cast(CI->getArgOperand(1))->getZExtValue(); auto *VecTy = cast(CI->getType()); @@ -3134,7 +3134,7 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) { Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2)); } else if (IsX86 && (Name == "sse2.pshufl.w" || - Name.startswith("avx512.mask.pshufl.w."))) { + Name.starts_with("avx512.mask.pshufl.w."))) { Value *Op0 = CI->getArgOperand(0); unsigned Imm = cast(CI->getArgOperand(1))->getZExtValue(); unsigned NumElts = cast(CI->getType())->getNumElements(); @@ -3153,7 +3153,7 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) { Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2)); } else if (IsX86 && (Name == "sse2.pshufh.w" || - Name.startswith("avx512.mask.pshufh.w."))) { + Name.starts_with("avx512.mask.pshufh.w."))) { Value *Op0 = CI->getArgOperand(0); unsigned Imm = cast(CI->getArgOperand(1))->getZExtValue(); unsigned NumElts = cast(CI->getType())->getNumElements(); @@ -3171,7 +3171,7 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) { if (CI->arg_size() == 4) Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2)); - } else if (IsX86 && Name.startswith("avx512.mask.shuf.p")) { + } else if (IsX86 && Name.starts_with("avx512.mask.shuf.p")) { Value *Op0 = CI->getArgOperand(0); Value *Op1 = CI->getArgOperand(1); unsigned Imm = cast(CI->getArgOperand(2))->getZExtValue(); @@ -3196,15 +3196,15 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) { Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep, CI->getArgOperand(3)); - } else if (IsX86 && (Name.startswith("avx512.mask.movddup") || - Name.startswith("avx512.mask.movshdup") || - Name.startswith("avx512.mask.movsldup"))) { + } else if (IsX86 && (Name.starts_with("avx512.mask.movddup") || + Name.starts_with("avx512.mask.movshdup") || + Name.starts_with("avx512.mask.movsldup"))) { Value *Op0 = CI->getArgOperand(0); unsigned NumElts = cast(CI->getType())->getNumElements(); unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits(); unsigned Offset = 0; - if (Name.startswith("avx512.mask.movshdup.")) + if (Name.starts_with("avx512.mask.movshdup.")) Offset = 1; SmallVector Idxs(NumElts); @@ -3218,8 +3218,8 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) { Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1)); - } else if (IsX86 && (Name.startswith("avx512.mask.punpckl") || - Name.startswith("avx512.mask.unpckl."))) { + } else if (IsX86 && (Name.starts_with("avx512.mask.punpckl") || + Name.starts_with("avx512.mask.unpckl."))) { Value *Op0 = CI->getArgOperand(0); Value *Op1 = CI->getArgOperand(1); int NumElts = cast(CI->getType())->getNumElements(); @@ -3234,8 +3234,8 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) { Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2)); - } else if (IsX86 && (Name.startswith("avx512.mask.punpckh") || - Name.startswith("avx512.mask.unpckh."))) { + } else if (IsX86 && (Name.starts_with("avx512.mask.punpckh") || + Name.starts_with("avx512.mask.unpckh."))) { Value *Op0 = CI->getArgOperand(0); Value *Op1 = CI->getArgOperand(1); int NumElts = cast(CI->getType())->getNumElements(); @@ -3250,8 +3250,8 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) { Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2)); - } else if (IsX86 && (Name.startswith("avx512.mask.and.") || - Name.startswith("avx512.mask.pand."))) { + } else if (IsX86 && (Name.starts_with("avx512.mask.and.") || + Name.starts_with("avx512.mask.pand."))) { VectorType *FTy = cast(CI->getType()); VectorType *ITy = VectorType::getInteger(FTy); Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy), @@ -3259,8 +3259,8 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) { Rep = Builder.CreateBitCast(Rep, FTy); Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2)); - } else if (IsX86 && (Name.startswith("avx512.mask.andn.") || - Name.startswith("avx512.mask.pandn."))) { + } else if (IsX86 && (Name.starts_with("avx512.mask.andn.") || + Name.starts_with("avx512.mask.pandn."))) { VectorType *FTy = cast(CI->getType()); VectorType *ITy = VectorType::getInteger(FTy); Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy)); @@ -3269,8 +3269,8 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) { Rep = Builder.CreateBitCast(Rep, FTy); Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2)); - } else if (IsX86 && (Name.startswith("avx512.mask.or.") || - Name.startswith("avx512.mask.por."))) { + } else if (IsX86 && (Name.starts_with("avx512.mask.or.") || + Name.starts_with("avx512.mask.por."))) { VectorType *FTy = cast(CI->getType()); VectorType *ITy = VectorType::getInteger(FTy); Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy), @@ -3278,8 +3278,8 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) { Rep = Builder.CreateBitCast(Rep, FTy); Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2)); - } else if (IsX86 && (Name.startswith("avx512.mask.xor.") || - Name.startswith("avx512.mask.pxor."))) { + } else if (IsX86 && (Name.starts_with("avx512.mask.xor.") || + Name.starts_with("avx512.mask.pxor."))) { VectorType *FTy = cast(CI->getType()); VectorType *ITy = VectorType::getInteger(FTy); Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy), @@ -3287,20 +3287,20 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) { Rep = Builder.CreateBitCast(Rep, FTy); Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2)); - } else if (IsX86 && Name.startswith("avx512.mask.padd.")) { + } else if (IsX86 && Name.starts_with("avx512.mask.padd.")) { Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1)); Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2)); - } else if (IsX86 && Name.startswith("avx512.mask.psub.")) { + } else if (IsX86 && Name.starts_with("avx512.mask.psub.")) { Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1)); Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2)); - } else if (IsX86 && Name.startswith("avx512.mask.pmull.")) { + } else if (IsX86 && Name.starts_with("avx512.mask.pmull.")) { Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1)); Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2)); - } else if (IsX86 && Name.startswith("avx512.mask.add.p")) { - if (Name.endswith(".512")) { + } else if (IsX86 && Name.starts_with("avx512.mask.add.p")) { + if (Name.ends_with(".512")) { Intrinsic::ID IID; if (Name[17] == 's') IID = Intrinsic::x86_avx512_add_ps_512; @@ -3315,8 +3315,8 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) { } Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2)); - } else if (IsX86 && Name.startswith("avx512.mask.div.p")) { - if (Name.endswith(".512")) { + } else if (IsX86 && Name.starts_with("avx512.mask.div.p")) { + if (Name.ends_with(".512")) { Intrinsic::ID IID; if (Name[17] == 's') IID = Intrinsic::x86_avx512_div_ps_512; @@ -3331,8 +3331,8 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) { } Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2)); - } else if (IsX86 && Name.startswith("avx512.mask.mul.p")) { - if (Name.endswith(".512")) { + } else if (IsX86 && Name.starts_with("avx512.mask.mul.p")) { + if (Name.ends_with(".512")) { Intrinsic::ID IID; if (Name[17] == 's') IID = Intrinsic::x86_avx512_mul_ps_512; @@ -3347,8 +3347,8 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) { } Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2)); - } else if (IsX86 && Name.startswith("avx512.mask.sub.p")) { - if (Name.endswith(".512")) { + } else if (IsX86 && Name.starts_with("avx512.mask.sub.p")) { + if (Name.ends_with(".512")) { Intrinsic::ID IID; if (Name[17] == 's') IID = Intrinsic::x86_avx512_sub_ps_512; @@ -3363,8 +3363,8 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) { } Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2)); - } else if (IsX86 && (Name.startswith("avx512.mask.max.p") || - Name.startswith("avx512.mask.min.p")) && + } else if (IsX86 && (Name.starts_with("avx512.mask.max.p") || + Name.starts_with("avx512.mask.min.p")) && Name.drop_front(18) == ".512") { bool IsDouble = Name[17] == 'd'; bool IsMin = Name[13] == 'i'; @@ -3379,14 +3379,14 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) { CI->getArgOperand(4) }); Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2)); - } else if (IsX86 && Name.startswith("avx512.mask.lzcnt.")) { + } else if (IsX86 && Name.starts_with("avx512.mask.lzcnt.")) { Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz, CI->getType()), { CI->getArgOperand(0), Builder.getInt1(false) }); Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1)); - } else if (IsX86 && Name.startswith("avx512.mask.psll")) { + } else if (IsX86 && Name.starts_with("avx512.mask.psll")) { bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i'); bool IsVariable = Name[16] == 'v'; @@ -3413,7 +3413,7 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) { IID = Intrinsic::x86_avx512_psllv_w_512; else llvm_unreachable("Unexpected size"); - } else if (Name.endswith(".128")) { + } else if (Name.ends_with(".128")) { if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d : Intrinsic::x86_sse2_psll_d; @@ -3425,7 +3425,7 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) { : Intrinsic::x86_sse2_psll_w; else llvm_unreachable("Unexpected size"); - } else if (Name.endswith(".256")) { + } else if (Name.ends_with(".256")) { if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d : Intrinsic::x86_avx2_psll_d; @@ -3454,7 +3454,7 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) { } Rep = UpgradeX86MaskedShift(Builder, *CI, IID); - } else if (IsX86 && Name.startswith("avx512.mask.psrl")) { + } else if (IsX86 && Name.starts_with("avx512.mask.psrl")) { bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i'); bool IsVariable = Name[16] == 'v'; @@ -3481,7 +3481,7 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) { IID = Intrinsic::x86_avx512_psrlv_w_512; else llvm_unreachable("Unexpected size"); - } else if (Name.endswith(".128")) { + } else if (Name.ends_with(".128")) { if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d : Intrinsic::x86_sse2_psrl_d; @@ -3493,7 +3493,7 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) { : Intrinsic::x86_sse2_psrl_w; else llvm_unreachable("Unexpected size"); - } else if (Name.endswith(".256")) { + } else if (Name.ends_with(".256")) { if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d : Intrinsic::x86_avx2_psrl_d; @@ -3522,7 +3522,7 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) { } Rep = UpgradeX86MaskedShift(Builder, *CI, IID); - } else if (IsX86 && Name.startswith("avx512.mask.psra")) { + } else if (IsX86 && Name.starts_with("avx512.mask.psra")) { bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i'); bool IsVariable = Name[16] == 'v'; @@ -3545,7 +3545,7 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) { IID = Intrinsic::x86_avx512_psrav_w_512; else llvm_unreachable("Unexpected size"); - } else if (Name.endswith(".128")) { + } else if (Name.ends_with(".128")) { if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d : Intrinsic::x86_sse2_psra_d; @@ -3558,7 +3558,7 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) { : Intrinsic::x86_sse2_psra_w; else llvm_unreachable("Unexpected size"); - } else if (Name.endswith(".256")) { + } else if (Name.ends_with(".256")) { if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d : Intrinsic::x86_avx2_psra_d; @@ -3588,11 +3588,11 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) { } Rep = UpgradeX86MaskedShift(Builder, *CI, IID); - } else if (IsX86 && Name.startswith("avx512.mask.move.s")) { + } else if (IsX86 && Name.starts_with("avx512.mask.move.s")) { Rep = upgradeMaskedMove(Builder, *CI); - } else if (IsX86 && Name.startswith("avx512.cvtmask2")) { + } else if (IsX86 && Name.starts_with("avx512.cvtmask2")) { Rep = UpgradeMaskToInt(Builder, *CI); - } else if (IsX86 && Name.endswith(".movntdqa")) { + } else if (IsX86 && Name.ends_with(".movntdqa")) { MDNode *Node = MDNode::get( C, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1))); @@ -3606,10 +3606,10 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) { Align(CI->getType()->getPrimitiveSizeInBits().getFixedValue() / 8)); LI->setMetadata(LLVMContext::MD_nontemporal, Node); Rep = LI; - } else if (IsX86 && (Name.startswith("fma.vfmadd.") || - Name.startswith("fma.vfmsub.") || - Name.startswith("fma.vfnmadd.") || - Name.startswith("fma.vfnmsub."))) { + } else if (IsX86 && (Name.starts_with("fma.vfmadd.") || + Name.starts_with("fma.vfmsub.") || + Name.starts_with("fma.vfnmadd.") || + Name.starts_with("fma.vfnmsub."))) { bool NegMul = Name[6] == 'n'; bool NegAcc = NegMul ? Name[8] == 's' : Name[7] == 's'; bool IsScalar = NegMul ? Name[12] == 's' : Name[11] == 's'; @@ -3638,7 +3638,7 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) { if (IsScalar) Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0); - } else if (IsX86 && Name.startswith("fma4.vfmadd.s")) { + } else if (IsX86 && Name.starts_with("fma4.vfmadd.s")) { Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2) }; @@ -3653,11 +3653,11 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) { Rep = Builder.CreateInsertElement(Constant::getNullValue(CI->getType()), Rep, (uint64_t)0); - } else if (IsX86 && (Name.startswith("avx512.mask.vfmadd.s") || - Name.startswith("avx512.maskz.vfmadd.s") || - Name.startswith("avx512.mask3.vfmadd.s") || - Name.startswith("avx512.mask3.vfmsub.s") || - Name.startswith("avx512.mask3.vfnmsub.s"))) { + } else if (IsX86 && (Name.starts_with("avx512.mask.vfmadd.s") || + Name.starts_with("avx512.maskz.vfmadd.s") || + Name.starts_with("avx512.mask3.vfmadd.s") || + Name.starts_with("avx512.mask3.vfmsub.s") || + Name.starts_with("avx512.mask3.vfnmsub.s"))) { bool IsMask3 = Name[11] == '3'; bool IsMaskZ = Name[11] == 'z'; // Drop the "avx512.mask." to make it easier. @@ -3711,13 +3711,13 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) { Rep, PassThru); Rep = Builder.CreateInsertElement(CI->getArgOperand(IsMask3 ? 2 : 0), Rep, (uint64_t)0); - } else if (IsX86 && (Name.startswith("avx512.mask.vfmadd.p") || - Name.startswith("avx512.mask.vfnmadd.p") || - Name.startswith("avx512.mask.vfnmsub.p") || - Name.startswith("avx512.mask3.vfmadd.p") || - Name.startswith("avx512.mask3.vfmsub.p") || - Name.startswith("avx512.mask3.vfnmsub.p") || - Name.startswith("avx512.maskz.vfmadd.p"))) { + } else if (IsX86 && (Name.starts_with("avx512.mask.vfmadd.p") || + Name.starts_with("avx512.mask.vfnmadd.p") || + Name.starts_with("avx512.mask.vfnmsub.p") || + Name.starts_with("avx512.mask3.vfmadd.p") || + Name.starts_with("avx512.mask3.vfmsub.p") || + Name.starts_with("avx512.mask3.vfnmsub.p") || + Name.starts_with("avx512.maskz.vfmadd.p"))) { bool IsMask3 = Name[11] == '3'; bool IsMaskZ = Name[11] == 'z'; // Drop the "avx512.mask." to make it easier. @@ -3760,7 +3760,7 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) { CI->getArgOperand(0); Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru); - } else if (IsX86 && Name.startswith("fma.vfmsubadd.p")) { + } else if (IsX86 && Name.starts_with("fma.vfmsubadd.p")) { unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits(); unsigned EltWidth = CI->getType()->getScalarSizeInBits(); Intrinsic::ID IID; @@ -3780,10 +3780,10 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) { Ops[2] = Builder.CreateFNeg(Ops[2]); Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID), Ops); - } else if (IsX86 && (Name.startswith("avx512.mask.vfmaddsub.p") || - Name.startswith("avx512.mask3.vfmaddsub.p") || - Name.startswith("avx512.maskz.vfmaddsub.p") || - Name.startswith("avx512.mask3.vfmsubadd.p"))) { + } else if (IsX86 && (Name.starts_with("avx512.mask.vfmaddsub.p") || + Name.starts_with("avx512.mask3.vfmaddsub.p") || + Name.starts_with("avx512.maskz.vfmaddsub.p") || + Name.starts_with("avx512.mask3.vfmsubadd.p"))) { bool IsMask3 = Name[11] == '3'; bool IsMaskZ = Name[11] == 'z'; // Drop the "avx512.mask." to make it easier. @@ -3831,8 +3831,8 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) { CI->getArgOperand(0); Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru); - } else if (IsX86 && (Name.startswith("avx512.mask.pternlog.") || - Name.startswith("avx512.maskz.pternlog."))) { + } else if (IsX86 && (Name.starts_with("avx512.mask.pternlog.") || + Name.starts_with("avx512.maskz.pternlog."))) { bool ZeroMask = Name[11] == 'z'; unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits(); unsigned EltWidth = CI->getType()->getScalarSizeInBits(); @@ -3859,8 +3859,8 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) { Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType()) : CI->getArgOperand(0); Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep, PassThru); - } else if (IsX86 && (Name.startswith("avx512.mask.vpmadd52") || - Name.startswith("avx512.maskz.vpmadd52"))) { + } else if (IsX86 && (Name.starts_with("avx512.mask.vpmadd52") || + Name.starts_with("avx512.maskz.vpmadd52"))) { bool ZeroMask = Name[11] == 'z'; bool High = Name[20] == 'h' || Name[21] == 'h'; unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits(); @@ -3887,16 +3887,16 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) { Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType()) : CI->getArgOperand(0); Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru); - } else if (IsX86 && (Name.startswith("avx512.mask.vpermi2var.") || - Name.startswith("avx512.mask.vpermt2var.") || - Name.startswith("avx512.maskz.vpermt2var."))) { + } else if (IsX86 && (Name.starts_with("avx512.mask.vpermi2var.") || + Name.starts_with("avx512.mask.vpermt2var.") || + Name.starts_with("avx512.maskz.vpermt2var."))) { bool ZeroMask = Name[11] == 'z'; bool IndexForm = Name[17] == 'i'; Rep = UpgradeX86VPERMT2Intrinsics(Builder, *CI, ZeroMask, IndexForm); - } else if (IsX86 && (Name.startswith("avx512.mask.vpdpbusd.") || - Name.startswith("avx512.maskz.vpdpbusd.") || - Name.startswith("avx512.mask.vpdpbusds.") || - Name.startswith("avx512.maskz.vpdpbusds."))) { + } else if (IsX86 && (Name.starts_with("avx512.mask.vpdpbusd.") || + Name.starts_with("avx512.maskz.vpdpbusd.") || + Name.starts_with("avx512.mask.vpdpbusds.") || + Name.starts_with("avx512.maskz.vpdpbusds."))) { bool ZeroMask = Name[11] == 'z'; bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's'; unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits(); @@ -3923,10 +3923,10 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) { Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType()) : CI->getArgOperand(0); Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru); - } else if (IsX86 && (Name.startswith("avx512.mask.vpdpwssd.") || - Name.startswith("avx512.maskz.vpdpwssd.") || - Name.startswith("avx512.mask.vpdpwssds.") || - Name.startswith("avx512.maskz.vpdpwssds."))) { + } else if (IsX86 && (Name.starts_with("avx512.mask.vpdpwssd.") || + Name.starts_with("avx512.maskz.vpdpwssd.") || + Name.starts_with("avx512.mask.vpdpwssds.") || + Name.starts_with("avx512.maskz.vpdpwssds."))) { bool ZeroMask = Name[11] == 'z'; bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's'; unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits(); @@ -3986,7 +3986,7 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) { CI->replaceAllUsesWith(CF); Rep = nullptr; - } else if (IsX86 && Name.startswith("avx512.mask.") && + } else if (IsX86 && Name.starts_with("avx512.mask.") && upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) { // Rep will be updated by the call in the condition. } else if (IsNVVM && (Name == "abs.i" || Name == "abs.ll")) { @@ -3995,8 +3995,8 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) { Value *Cmp = Builder.CreateICmpSGE( Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond"); Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs"); - } else if (IsNVVM && (Name.startswith("atomic.load.add.f32.p") || - Name.startswith("atomic.load.add.f64.p"))) { + } else if (IsNVVM && (Name.starts_with("atomic.load.add.f32.p") || + Name.starts_with("atomic.load.add.f64.p"))) { Value *Ptr = CI->getArgOperand(0); Value *Val = CI->getArgOperand(1); Rep = Builder.CreateAtomicRMW(AtomicRMWInst::FAdd, Ptr, Val, MaybeAlign(), @@ -4005,7 +4005,7 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) { Name == "max.ui" || Name == "max.ull")) { Value *Arg0 = CI->getArgOperand(0); Value *Arg1 = CI->getArgOperand(1); - Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull") + Value *Cmp = Name.ends_with(".ui") || Name.ends_with(".ull") ? Builder.CreateICmpUGE(Arg0, Arg1, "max.cond") : Builder.CreateICmpSGE(Arg0, Arg1, "max.cond"); Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "max"); @@ -4013,7 +4013,7 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) { Name == "min.ui" || Name == "min.ull")) { Value *Arg0 = CI->getArgOperand(0); Value *Arg1 = CI->getArgOperand(1); - Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull") + Value *Cmp = Name.ends_with(".ui") || Name.ends_with(".ull") ? Builder.CreateICmpULE(Arg0, Arg1, "min.cond") : Builder.CreateICmpSLE(Arg0, Arg1, "min.cond"); Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "min"); @@ -4174,7 +4174,7 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) { case Intrinsic::vector_extract: { StringRef Name = F->getName(); Name = Name.substr(5); // Strip llvm - if (!Name.startswith("aarch64.sve.tuple.get")) { + if (!Name.starts_with("aarch64.sve.tuple.get")) { DefaultCase(); return; } @@ -4190,11 +4190,11 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) { case Intrinsic::vector_insert: { StringRef Name = F->getName(); Name = Name.substr(5); - if (!Name.startswith("aarch64.sve.tuple")) { + if (!Name.starts_with("aarch64.sve.tuple")) { DefaultCase(); return; } - if (Name.startswith("aarch64.sve.tuple.set")) { + if (Name.starts_with("aarch64.sve.tuple.set")) { unsigned I = dyn_cast(CI->getArgOperand(1))->getZExtValue(); ScalableVectorType *Ty = dyn_cast(CI->getArgOperand(2)->getType()); @@ -4204,7 +4204,7 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) { NewFn, {CI->getArgOperand(0), CI->getArgOperand(2), NewIdx}); break; } - if (Name.startswith("aarch64.sve.tuple.create")) { + if (Name.starts_with("aarch64.sve.tuple.create")) { unsigned N = StringSwitch(Name) .StartsWith("aarch64.sve.tuple.create2", 2) .StartsWith("aarch64.sve.tuple.create3", 3) @@ -4283,7 +4283,7 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) { StringRef Name = F->getName(); Name = Name.substr(5); // Strip llvm. // Upgrade `dbg.addr` to `dbg.value` with `DW_OP_deref`. - if (Name.startswith("dbg.addr")) { + if (Name.starts_with("dbg.addr")) { DIExpression *Expr = cast( cast(CI->getArgOperand(2))->getMetadata()); Expr = DIExpression::append(Expr, dwarf::DW_OP_deref); @@ -4854,7 +4854,7 @@ bool llvm::UpgradeModuleFlags(Module &M) { // Upgrade branch protection and return address signing module flags. The // module flag behavior for these fields were Error and now they are Min. if (ID->getString() == "branch-target-enforcement" || - ID->getString().startswith("sign-return-address")) { + ID->getString().starts_with("sign-return-address")) { if (auto *Behavior = mdconst::dyn_extract_or_null(Op->getOperand(0))) { if (Behavior->getLimitedValue() == Module::Error) { @@ -4957,7 +4957,7 @@ void llvm::UpgradeSectionAttributes(Module &M) { StringRef Section = GV.getSection(); - if (!Section.startswith("__DATA, __objc_catlist")) + if (!Section.starts_with("__DATA, __objc_catlist")) continue; // __DATA, __objc_catlist, regular, no_dead_strip @@ -5017,12 +5017,12 @@ static bool isOldLoopArgument(Metadata *MD) { auto *S = dyn_cast_or_null(T->getOperand(0)); if (!S) return false; - return S->getString().startswith("llvm.vectorizer."); + return S->getString().starts_with("llvm.vectorizer."); } static MDString *upgradeLoopTag(LLVMContext &C, StringRef OldTag) { StringRef OldPrefix = "llvm.vectorizer."; - assert(OldTag.startswith(OldPrefix) && "Expected old prefix"); + assert(OldTag.starts_with(OldPrefix) && "Expected old prefix"); if (OldTag == "llvm.vectorizer.unroll") return MDString::get(C, "llvm.loop.interleave.count"); @@ -5041,7 +5041,7 @@ static Metadata *upgradeLoopArgument(Metadata *MD) { auto *OldTag = dyn_cast_or_null(T->getOperand(0)); if (!OldTag) return MD; - if (!OldTag->getString().startswith("llvm.vectorizer.")) + if (!OldTag->getString().starts_with("llvm.vectorizer.")) return MD; // This has an old tag. Upgrade it. @@ -5075,7 +5075,7 @@ std::string llvm::UpgradeDataLayoutString(StringRef DL, StringRef TT) { // The only data layout upgrades needed for pre-GCN are setting the address // space of globals to 1. if (T.isAMDGPU() && !T.isAMDGCN() && !DL.contains("-G") && - !DL.startswith("G")) { + !DL.starts_with("G")) { return DL.empty() ? std::string("G1") : (DL + "-G1").str(); } @@ -5097,7 +5097,7 @@ std::string llvm::UpgradeDataLayoutString(StringRef DL, StringRef TT) { // Add missing non-integral declarations. // This goes before adding new address spaces to prevent incoherent string // values. - if (!DL.contains("-ni") && !DL.startswith("ni")) + if (!DL.contains("-ni") && !DL.starts_with("ni")) Res.append("-ni:7:8"); // Update ni:7 to ni:7:8. if (DL.ends_with("ni:7")) @@ -5105,9 +5105,9 @@ std::string llvm::UpgradeDataLayoutString(StringRef DL, StringRef TT) { // Add sizing for address spaces 7 and 8 (fat raw buffers and buffer // resources) An empty data layout has already been upgraded to G1 by now. - if (!DL.contains("-p7") && !DL.startswith("p7")) + if (!DL.contains("-p7") && !DL.starts_with("p7")) Res.append("-p7:160:256:256:32"); - if (!DL.contains("-p8") && !DL.startswith("p8")) + if (!DL.contains("-p8") && !DL.starts_with("p8")) Res.append("-p8:128:128"); return Res; diff --git a/llvm/lib/IR/Constants.cpp b/llvm/lib/IR/Constants.cpp index cca481181068d7..16072d2efc95bf 100644 --- a/llvm/lib/IR/Constants.cpp +++ b/llvm/lib/IR/Constants.cpp @@ -2270,14 +2270,14 @@ bool ConstantExpr::isDesirableCastOp(unsigned Opcode) { switch (Opcode) { case Instruction::ZExt: case Instruction::SExt: - return false; - case Instruction::Trunc: case Instruction::FPTrunc: case Instruction::FPExt: case Instruction::UIToFP: case Instruction::SIToFP: case Instruction::FPToUI: case Instruction::FPToSI: + return false; + case Instruction::Trunc: case Instruction::PtrToInt: case Instruction::IntToPtr: case Instruction::BitCast: diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index a751c4f85714bc..c9d6ca3373cf40 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -1328,6 +1328,10 @@ void Verifier::visitDICompositeType(const DICompositeType &N) { CheckDI(N.getTag() == dwarf::DW_TAG_array_type, "rank can only appear in array type"); } + + if (N.getTag() == dwarf::DW_TAG_array_type) { + CheckDI(N.getRawBaseType(), "array types must have a base type", &N); + } } void Verifier::visitDISubroutineType(const DISubroutineType &N) { diff --git a/llvm/lib/MC/ELFObjectWriter.cpp b/llvm/lib/MC/ELFObjectWriter.cpp index e4d18d8a7dd5b5..8490fefe7ff535 100644 --- a/llvm/lib/MC/ELFObjectWriter.cpp +++ b/llvm/lib/MC/ELFObjectWriter.cpp @@ -843,7 +843,7 @@ bool ELFWriter::maybeWriteCompression( uint32_t ChType, uint64_t Size, SmallVectorImpl &CompressedContents, Align Alignment) { uint64_t HdrSize = - is64Bit() ? sizeof(ELF::Elf64_Chdr) : sizeof(ELF::Elf32_Chdr); + is64Bit() ? sizeof(ELF::Elf32_Chdr) : sizeof(ELF::Elf64_Chdr); if (Size <= HdrSize + CompressedContents.size()) return false; // Platform specific header is followed by compressed data. diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index 804ffb90b53024..cd810f0b43e50d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -19,6 +19,7 @@ #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "MCTargetDesc/R600MCTargetDesc.h" #include "R600RegisterInfo.h" +#include "SIISelLowering.h" #include "SIMachineFunctionInfo.h" #include "llvm/Analysis/UniformityAnalysis.h" #include "llvm/Analysis/ValueTracking.h" @@ -2263,6 +2264,34 @@ bool AMDGPUDAGToDAGISel::isCBranchSCC(const SDNode *N) const { return false; } +static SDValue combineBallotPattern(SDValue VCMP, bool &Negate) { + assert(VCMP->getOpcode() == AMDGPUISD::SETCC); + // Special case for amdgcn.ballot: + // %Cond = i1 (and/or combination of i1 ISD::SETCCs) + // %VCMP = i(WaveSize) AMDGPUISD::SETCC (ext %Cond), 0, setne/seteq + // => + // Use i1 %Cond value instead of i(WaveSize) %VCMP. + // This is possible because divergent ISD::SETCC is selected as V_CMP and + // Cond becomes a i(WaveSize) full mask value. + // Note that ballot doesn't use SETEQ condition but its easy to support it + // here for completeness, so in this case Negate is set true on return. + auto VCMP_CC = cast(VCMP.getOperand(2))->get(); + auto *VCMP_CRHS = dyn_cast(VCMP.getOperand(1)); + if ((VCMP_CC == ISD::SETEQ || VCMP_CC == ISD::SETNE) && VCMP_CRHS && + VCMP_CRHS->isZero()) { + + auto Cond = VCMP.getOperand(0); + if (ISD::isExtOpcode(Cond->getOpcode())) // Skip extension. + Cond = Cond.getOperand(0); + + if (isBoolSGPR(Cond)) { + Negate = VCMP_CC == ISD::SETEQ; + return Cond; + } + } + return SDValue(); +} + void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) { SDValue Cond = N->getOperand(1); @@ -2276,11 +2305,50 @@ void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) { const SIRegisterInfo *TRI = ST->getRegisterInfo(); bool UseSCCBr = isCBranchSCC(N) && isUniformBr(N); - unsigned BrOp = UseSCCBr ? AMDGPU::S_CBRANCH_SCC1 : AMDGPU::S_CBRANCH_VCCNZ; + bool AndExec = !UseSCCBr; + bool Negate = false; + + if (Cond.getOpcode() == ISD::SETCC && + Cond->getOperand(0)->getOpcode() == AMDGPUISD::SETCC) { + SDValue VCMP = Cond->getOperand(0); + auto CC = cast(Cond->getOperand(2))->get(); + auto *CRHS = dyn_cast(Cond->getOperand(1)); + if ((CC == ISD::SETEQ || CC == ISD::SETNE) && CRHS && CRHS->isZero() && + // TODO: make condition below an assert after fixing ballot bitwidth. + VCMP.getValueType().getSizeInBits() == ST->getWavefrontSize()) { + // %VCMP = i(WaveSize) AMDGPUISD::SETCC ... + // %C = i1 ISD::SETCC %VCMP, 0, setne/seteq + // BRCOND i1 %C, %BB + // => + // %VCMP = i(WaveSize) AMDGPUISD::SETCC ... + // VCC = COPY i(WaveSize) %VCMP + // S_CBRANCH_VCCNZ/VCCZ %BB + Negate = CC == ISD::SETEQ; + bool NegatedBallot = false; + if (auto BallotCond = combineBallotPattern(VCMP, NegatedBallot)) { + Cond = BallotCond; + UseSCCBr = !BallotCond->isDivergent(); + Negate = Negate ^ NegatedBallot; + } else { + // TODO: don't use SCC here assuming that AMDGPUISD::SETCC is always + // selected as V_CMP, but this may change for uniform condition. + Cond = VCMP; + UseSCCBr = false; + } + } + // Cond is either V_CMP resulted from AMDGPUISD::SETCC or a combination of + // V_CMPs resulted from ballot or ballot has uniform condition and SCC is + // used. + AndExec = false; + } + + unsigned BrOp = + UseSCCBr ? (Negate ? AMDGPU::S_CBRANCH_SCC0 : AMDGPU::S_CBRANCH_SCC1) + : (Negate ? AMDGPU::S_CBRANCH_VCCZ : AMDGPU::S_CBRANCH_VCCNZ); Register CondReg = UseSCCBr ? AMDGPU::SCC : TRI->getVCC(); SDLoc SL(N); - if (!UseSCCBr) { + if (AndExec) { // This is the case that we are selecting to S_CBRANCH_VCCNZ. We have not // analyzed what generates the vcc value, so we do not know whether vcc // bits for disabled lanes are 0. Thus we need to mask out bits for diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index f4f4d095fba3ce..5c46d81f57af6a 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -10628,9 +10628,7 @@ SDValue SITargetLowering::splitBinaryBitConstantOp( return SDValue(); } -// Returns true if argument is a boolean value which is not serialized into -// memory or argument and does not require v_cndmask_b32 to be deserialized. -static bool isBoolSGPR(SDValue V) { +bool llvm::isBoolSGPR(SDValue V) { if (V.getValueType() != MVT::i1) return false; switch (V.getOpcode()) { diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h index d717e12d29a514..746a88c5ea13a3 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.h +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h @@ -586,6 +586,10 @@ class SITargetLowering final : public AMDGPUTargetLowering { getTargetMMOFlags(const Instruction &I) const override; }; +// Returns true if argument is a boolean value which is not serialized into +// memory or argument and does not require v_cndmask_b32 to be deserialized. +bool isBoolSGPR(SDValue V); + } // End namespace llvm #endif diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index c0e0ac1b4ec887..114d33b077866a 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -515,6 +515,16 @@ def VOP3_CVT_SR_F8_F32_Profile : VOP3_Profile, let HasExtVOP3DPP = 0; } +def IsPow2Plus1: PatLeaf<(i32 imm), [{ + uint32_t V = N->getZExtValue(); + return isPowerOf2_32(V - 1); +}]>; + +def Log2_32: SDNodeXFormgetZExtValue(); + return CurDAG->getTargetConstant(Log2_32(V - 1), SDLoc(N), MVT::i32); +}]>; + let SubtargetPredicate = isGFX9Plus in { let isCommutable = 1, isReMaterializable = 1 in { defm V_ADD3_U32 : VOP3Inst <"v_add3_u32", VOP3_Profile>; @@ -612,6 +622,10 @@ def : ThreeOp_i32_Pats; def : ThreeOp_i32_Pats; def : ThreeOp_i32_Pats; +def : GCNPat< + (DivergentBinFrag i32:$src0, IsPow2Plus1:$src1), + (V_LSHL_ADD_U32_e64 i32:$src0, (i32 (Log2_32 imm:$src1)), i32:$src0)>; + let SubtargetPredicate = isGFX940Plus in def : GCNPat< (ThreeOpFrag i64:$src0, i32:$src1, i64:$src2), diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp index 49ced893d5c734..50e4a8b87a5d2d 100644 --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -330,7 +330,7 @@ Value *AA::getWithType(Value &V, Type &Ty) { if (C->getType()->isIntegerTy() && Ty.isIntegerTy()) return ConstantExpr::getTrunc(C, &Ty, /* OnlyIfReduced */ true); if (C->getType()->isFloatingPointTy() && Ty.isFloatingPointTy()) - return ConstantExpr::getFPTrunc(C, &Ty, /* OnlyIfReduced */ true); + return ConstantFoldCastInstruction(Instruction::FPTrunc, C, &Ty); } } return nullptr; diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp index 87181650e75872..22fd3edc39acb0 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -1841,10 +1841,11 @@ Instruction *InstCombinerImpl::visitFAdd(BinaryOperator &I) { // instcombined. if (ConstantFP *CFP = dyn_cast(RHS)) if (IsValidPromotion(FPType, LHSIntVal->getType())) { - Constant *CI = - ConstantExpr::getFPToSI(CFP, LHSIntVal->getType()); + Constant *CI = ConstantFoldCastOperand(Instruction::FPToSI, CFP, + LHSIntVal->getType(), DL); if (LHSConv->hasOneUse() && - ConstantExpr::getSIToFP(CI, I.getType()) == CFP && + ConstantFoldCastOperand(Instruction::SIToFP, CI, I.getType(), DL) == + CFP && willNotOverflowSignedAdd(LHSIntVal, CI, I)) { // Insert the new integer add. Value *NewAdd = Builder.CreateNSWAdd(LHSIntVal, CI, "addconv"); diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index f06657c8cd7633..55e26d09cd6e82 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -7324,17 +7324,14 @@ Instruction *InstCombinerImpl::foldFCmpIntToFPConst(FCmpInst &I, } // Okay, now we know that the FP constant fits in the range [SMIN, SMAX] or - // [0, UMAX], but it may still be fractional. See if it is fractional by - // casting the FP value to the integer value and back, checking for equality. + // [0, UMAX], but it may still be fractional. Check whether this is the case + // using the IsExact flag. // Don't do this for zero, because -0.0 is not fractional. - Constant *RHSInt = LHSUnsigned - ? ConstantExpr::getFPToUI(RHSC, IntTy) - : ConstantExpr::getFPToSI(RHSC, IntTy); + APSInt RHSInt(IntWidth, LHSUnsigned); + bool IsExact; + RHS.convertToInteger(RHSInt, APFloat::rmTowardZero, &IsExact); if (!RHS.isZero()) { - bool Equal = LHSUnsigned - ? ConstantExpr::getUIToFP(RHSInt, RHSC->getType()) == RHSC - : ConstantExpr::getSIToFP(RHSInt, RHSC->getType()) == RHSC; - if (!Equal) { + if (!IsExact) { // If we had a comparison against a fractional value, we have to adjust // the compare predicate and sometimes the value. RHSC is rounded towards // zero at this point. @@ -7400,7 +7397,7 @@ Instruction *InstCombinerImpl::foldFCmpIntToFPConst(FCmpInst &I, // Lower this FP comparison into an appropriate integer version of the // comparison. - return new ICmpInst(Pred, LHSI->getOperand(0), RHSInt); + return new ICmpInst(Pred, LHSI->getOperand(0), Builder.getInt(RHSInt)); } /// Fold (C / X) < 0.0 --> X < 0.0 if possible. Swap predicate if necessary. diff --git a/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp b/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp index cdcfb5050bff36..6220f850930969 100644 --- a/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp +++ b/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp @@ -101,7 +101,7 @@ class LibCallsShrinkWrap : public InstVisitor { float Val) { Constant *V = ConstantFP::get(BBBuilder.getContext(), APFloat(Val)); if (!Arg->getType()->isFloatTy()) - V = ConstantExpr::getFPExtend(V, Arg->getType()); + V = ConstantFoldCastInstruction(Instruction::FPExt, V, Arg->getType()); if (BBBuilder.GetInsertBlock()->getParent()->hasFnAttribute(Attribute::StrictFP)) BBBuilder.setIsFPConstrained(true); return BBBuilder.CreateFCmp(Cmp, Arg, V); diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index b425cc950f4678..4a5504c18f55b2 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -10080,7 +10080,15 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis { } if (!Vec) { Vec = SubVec; - assert(Part == 0 && "Expected firs part."); + assert((Part == 0 || all_of(seq(0, Part), + [&](unsigned P) { + ArrayRef SubMask = + Mask.slice(P * SliceSize, SliceSize); + return all_of(SubMask, [](int Idx) { + return Idx == PoisonMaskElem; + }); + })) && + "Expected first part or all previous parts masked."); copy(SubMask, VecMask.begin()); } else { unsigned VF = cast(Vec->getType())->getNumElements(); diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ballot.i32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ballot.i32.ll index 8bd1be04650e00..d641063984eb81 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ballot.i32.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ballot.i32.ll @@ -3,6 +3,7 @@ ; RUN: llc -march=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -mattr=+wavefrontsize32,-wavefrontsize64 -global-isel -verify-machineinstrs < %s | FileCheck %s declare i32 @llvm.amdgcn.ballot.i32(i1) +declare i64 @llvm.amdgcn.ballot.i64(i1) declare i32 @llvm.ctpop.i32(i32) ; Test ballot(0) @@ -83,3 +84,391 @@ define amdgpu_cs i32 @ctpop_of_ballot(float %x, float %y) { %bcnt = call i32 @llvm.ctpop.i32(i32 %ballot) ret i32 %bcnt } + +define amdgpu_cs i32 @branch_divergent_ballot_ne_zero_non_compare(i32 %v) { +; CHECK-LABEL: branch_divergent_ballot_ne_zero_non_compare: +; CHECK: ; %bb.0: +; CHECK-NEXT: v_and_b32_e32 v0, 1, v0 +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 +; CHECK-NEXT: s_cmp_eq_u32 vcc_lo, 0 +; CHECK-NEXT: s_cbranch_scc1 .LBB7_2 +; CHECK-NEXT: ; %bb.1: ; %true +; CHECK-NEXT: s_mov_b32 s0, 42 +; CHECK-NEXT: s_branch .LBB7_3 +; CHECK-NEXT: .LBB7_2: ; %false +; CHECK-NEXT: s_mov_b32 s0, 33 +; CHECK-NEXT: s_branch .LBB7_3 +; CHECK-NEXT: .LBB7_3: + %c = trunc i32 %v to i1 + %ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %c) + %ballot_ne_zero = icmp ne i32 %ballot, 0 + br i1 %ballot_ne_zero, label %true, label %false +true: + ret i32 42 +false: + ret i32 33 +} + +define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_non_compare(i32 inreg %v) { +; CHECK-LABEL: branch_uniform_ballot_ne_zero_non_compare: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_and_b32 s0, 1, s0 +; CHECK-NEXT: v_cmp_ne_u32_e64 s0, 0, s0 +; CHECK-NEXT: s_cmp_eq_u32 s0, 0 +; CHECK-NEXT: s_cbranch_scc1 .LBB8_2 +; CHECK-NEXT: ; %bb.1: ; %true +; CHECK-NEXT: s_mov_b32 s0, 42 +; CHECK-NEXT: s_branch .LBB8_3 +; CHECK-NEXT: .LBB8_2: ; %false +; CHECK-NEXT: s_mov_b32 s0, 33 +; CHECK-NEXT: s_branch .LBB8_3 +; CHECK-NEXT: .LBB8_3: + %c = trunc i32 %v to i1 + %ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %c) + %ballot_ne_zero = icmp ne i32 %ballot, 0 + br i1 %ballot_ne_zero, label %true, label %false +true: + ret i32 42 +false: + ret i32 33 +} + +define amdgpu_cs i32 @branch_divergent_ballot_eq_zero_non_compare(i32 %v) { +; CHECK-LABEL: branch_divergent_ballot_eq_zero_non_compare: +; CHECK: ; %bb.0: +; CHECK-NEXT: v_and_b32_e32 v0, 1, v0 +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 +; CHECK-NEXT: s_cmp_lg_u32 vcc_lo, 0 +; CHECK-NEXT: s_cbranch_scc0 .LBB9_2 +; CHECK-NEXT: ; %bb.1: ; %false +; CHECK-NEXT: s_mov_b32 s0, 33 +; CHECK-NEXT: s_branch .LBB9_3 +; CHECK-NEXT: .LBB9_2: ; %true +; CHECK-NEXT: s_mov_b32 s0, 42 +; CHECK-NEXT: s_branch .LBB9_3 +; CHECK-NEXT: .LBB9_3: + %c = trunc i32 %v to i1 + %ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %c) + %ballot_eq_zero = icmp eq i32 %ballot, 0 + br i1 %ballot_eq_zero, label %true, label %false +true: + ret i32 42 +false: + ret i32 33 +} + +define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_non_compare(i32 inreg %v) { +; CHECK-LABEL: branch_uniform_ballot_eq_zero_non_compare: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_and_b32 s0, 1, s0 +; CHECK-NEXT: v_cmp_ne_u32_e64 s0, 0, s0 +; CHECK-NEXT: s_cmp_lg_u32 s0, 0 +; CHECK-NEXT: s_cbranch_scc0 .LBB10_2 +; CHECK-NEXT: ; %bb.1: ; %false +; CHECK-NEXT: s_mov_b32 s0, 33 +; CHECK-NEXT: s_branch .LBB10_3 +; CHECK-NEXT: .LBB10_2: ; %true +; CHECK-NEXT: s_mov_b32 s0, 42 +; CHECK-NEXT: s_branch .LBB10_3 +; CHECK-NEXT: .LBB10_3: + %c = trunc i32 %v to i1 + %ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %c) + %ballot_eq_zero = icmp eq i32 %ballot, 0 + br i1 %ballot_eq_zero, label %true, label %false +true: + ret i32 42 +false: + ret i32 33 +} + +define amdgpu_cs i32 @branch_divergent_ballot_ne_zero_compare(i32 %v) { +; CHECK-LABEL: branch_divergent_ballot_ne_zero_compare: +; CHECK: ; %bb.0: +; CHECK-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0 +; CHECK-NEXT: s_cmp_eq_u32 vcc_lo, 0 +; CHECK-NEXT: s_cbranch_scc1 .LBB11_2 +; CHECK-NEXT: ; %bb.1: ; %true +; CHECK-NEXT: s_mov_b32 s0, 42 +; CHECK-NEXT: s_branch .LBB11_3 +; CHECK-NEXT: .LBB11_2: ; %false +; CHECK-NEXT: s_mov_b32 s0, 33 +; CHECK-NEXT: s_branch .LBB11_3 +; CHECK-NEXT: .LBB11_3: + %c = icmp ult i32 %v, 12 + %ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %c) + %ballot_ne_zero = icmp ne i32 %ballot, 0 + br i1 %ballot_ne_zero, label %true, label %false +true: + ret i32 42 +false: + ret i32 33 +} + +define amdgpu_cs i32 @branch_divergent_ballot64_ne_zero_compare(i32 %v) { +; CHECK-LABEL: branch_divergent_ballot64_ne_zero_compare: +; CHECK: ; %bb.0: +; CHECK-NEXT: v_cmp_gt_u32_e64 s0, 12, v0 +; CHECK-NEXT: s_mov_b32 s1, 0 +; CHECK-NEXT: s_cmp_eq_u64 s[0:1], 0 +; CHECK-NEXT: s_cbranch_scc1 .LBB12_2 +; CHECK-NEXT: ; %bb.1: ; %true +; CHECK-NEXT: s_mov_b32 s0, 42 +; CHECK-NEXT: s_branch .LBB12_3 +; CHECK-NEXT: .LBB12_2: ; %false +; CHECK-NEXT: s_mov_b32 s0, 33 +; CHECK-NEXT: s_branch .LBB12_3 +; CHECK-NEXT: .LBB12_3: + %c = icmp ult i32 %v, 12 + %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %c) + %ballot_ne_zero = icmp ne i64 %ballot, 0 + br i1 %ballot_ne_zero, label %true, label %false +true: + ret i32 42 +false: + ret i32 33 +} + +define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_compare(i32 inreg %v) { +; CHECK-LABEL: branch_uniform_ballot_ne_zero_compare: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_cmp_lt_u32 s0, 12 +; CHECK-NEXT: s_cselect_b32 s0, 1, 0 +; CHECK-NEXT: s_and_b32 s0, 1, s0 +; CHECK-NEXT: v_cmp_ne_u32_e64 s0, 0, s0 +; CHECK-NEXT: s_cmp_eq_u32 s0, 0 +; CHECK-NEXT: s_cbranch_scc1 .LBB13_2 +; CHECK-NEXT: ; %bb.1: ; %true +; CHECK-NEXT: s_mov_b32 s0, 42 +; CHECK-NEXT: s_branch .LBB13_3 +; CHECK-NEXT: .LBB13_2: ; %false +; CHECK-NEXT: s_mov_b32 s0, 33 +; CHECK-NEXT: s_branch .LBB13_3 +; CHECK-NEXT: .LBB13_3: + %c = icmp ult i32 %v, 12 + %ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %c) + %ballot_ne_zero = icmp ne i32 %ballot, 0 + br i1 %ballot_ne_zero, label %true, label %false +true: + ret i32 42 +false: + ret i32 33 +} + +define amdgpu_cs i32 @branch_divergent_ballot_eq_zero_compare(i32 %v) { +; CHECK-LABEL: branch_divergent_ballot_eq_zero_compare: +; CHECK: ; %bb.0: +; CHECK-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0 +; CHECK-NEXT: s_cmp_lg_u32 vcc_lo, 0 +; CHECK-NEXT: s_cbranch_scc0 .LBB14_2 +; CHECK-NEXT: ; %bb.1: ; %false +; CHECK-NEXT: s_mov_b32 s0, 33 +; CHECK-NEXT: s_branch .LBB14_3 +; CHECK-NEXT: .LBB14_2: ; %true +; CHECK-NEXT: s_mov_b32 s0, 42 +; CHECK-NEXT: s_branch .LBB14_3 +; CHECK-NEXT: .LBB14_3: + %c = icmp ult i32 %v, 12 + %ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %c) + %ballot_eq_zero = icmp eq i32 %ballot, 0 + br i1 %ballot_eq_zero, label %true, label %false +true: + ret i32 42 +false: + ret i32 33 +} + +define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_compare(i32 inreg %v) { +; CHECK-LABEL: branch_uniform_ballot_eq_zero_compare: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_cmp_lt_u32 s0, 12 +; CHECK-NEXT: s_cselect_b32 s0, 1, 0 +; CHECK-NEXT: s_and_b32 s0, 1, s0 +; CHECK-NEXT: v_cmp_ne_u32_e64 s0, 0, s0 +; CHECK-NEXT: s_cmp_lg_u32 s0, 0 +; CHECK-NEXT: s_cbranch_scc0 .LBB15_2 +; CHECK-NEXT: ; %bb.1: ; %false +; CHECK-NEXT: s_mov_b32 s0, 33 +; CHECK-NEXT: s_branch .LBB15_3 +; CHECK-NEXT: .LBB15_2: ; %true +; CHECK-NEXT: s_mov_b32 s0, 42 +; CHECK-NEXT: s_branch .LBB15_3 +; CHECK-NEXT: .LBB15_3: + %c = icmp ult i32 %v, 12 + %ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %c) + %ballot_eq_zero = icmp eq i32 %ballot, 0 + br i1 %ballot_eq_zero, label %true, label %false +true: + ret i32 42 +false: + ret i32 33 +} + +define amdgpu_cs i32 @branch_divergent_ballot_ne_zero_and(i32 %v1, i32 %v2) { +; CHECK-LABEL: branch_divergent_ballot_ne_zero_and: +; CHECK: ; %bb.0: +; CHECK-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0 +; CHECK-NEXT: v_cmp_lt_u32_e64 s0, 34, v1 +; CHECK-NEXT: s_and_b32 s0, vcc_lo, s0 +; CHECK-NEXT: s_cmp_eq_u32 s0, 0 +; CHECK-NEXT: s_cbranch_scc1 .LBB16_2 +; CHECK-NEXT: ; %bb.1: ; %true +; CHECK-NEXT: s_mov_b32 s0, 42 +; CHECK-NEXT: s_branch .LBB16_3 +; CHECK-NEXT: .LBB16_2: ; %false +; CHECK-NEXT: s_mov_b32 s0, 33 +; CHECK-NEXT: s_branch .LBB16_3 +; CHECK-NEXT: .LBB16_3: + %v1c = icmp ult i32 %v1, 12 + %v2c = icmp ugt i32 %v2, 34 + %c = and i1 %v1c, %v2c + %ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %c) + %ballot_ne_zero = icmp ne i32 %ballot, 0 + br i1 %ballot_ne_zero, label %true, label %false +true: + ret i32 42 +false: + ret i32 33 +} + +define amdgpu_cs i32 @branch_divergent_ballot64_ne_zero_and(i32 %v1, i32 %v2) { +; CHECK-LABEL: branch_divergent_ballot64_ne_zero_and: +; CHECK: ; %bb.0: +; CHECK-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0 +; CHECK-NEXT: v_cmp_lt_u32_e64 s0, 34, v1 +; CHECK-NEXT: s_mov_b32 s1, 0 +; CHECK-NEXT: s_and_b32 s0, vcc_lo, s0 +; CHECK-NEXT: s_cmp_eq_u64 s[0:1], 0 +; CHECK-NEXT: s_cbranch_scc1 .LBB17_2 +; CHECK-NEXT: ; %bb.1: ; %true +; CHECK-NEXT: s_mov_b32 s0, 42 +; CHECK-NEXT: s_branch .LBB17_3 +; CHECK-NEXT: .LBB17_2: ; %false +; CHECK-NEXT: s_mov_b32 s0, 33 +; CHECK-NEXT: s_branch .LBB17_3 +; CHECK-NEXT: .LBB17_3: + %v1c = icmp ult i32 %v1, 12 + %v2c = icmp ugt i32 %v2, 34 + %c = and i1 %v1c, %v2c + %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %c) + %ballot_ne_zero = icmp ne i64 %ballot, 0 + br i1 %ballot_ne_zero, label %true, label %false +true: + ret i32 42 +false: + ret i32 33 +} + +define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_and(i32 inreg %v1, i32 inreg %v2) { +; CHECK-LABEL: branch_uniform_ballot_ne_zero_and: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_cmp_lt_u32 s0, 12 +; CHECK-NEXT: s_cselect_b32 s0, 1, 0 +; CHECK-NEXT: s_cmp_gt_u32 s1, 34 +; CHECK-NEXT: s_cselect_b32 s1, 1, 0 +; CHECK-NEXT: s_and_b32 s0, s0, s1 +; CHECK-NEXT: s_and_b32 s0, 1, s0 +; CHECK-NEXT: v_cmp_ne_u32_e64 s0, 0, s0 +; CHECK-NEXT: s_cmp_eq_u32 s0, 0 +; CHECK-NEXT: s_cbranch_scc1 .LBB18_2 +; CHECK-NEXT: ; %bb.1: ; %true +; CHECK-NEXT: s_mov_b32 s0, 42 +; CHECK-NEXT: s_branch .LBB18_3 +; CHECK-NEXT: .LBB18_2: ; %false +; CHECK-NEXT: s_mov_b32 s0, 33 +; CHECK-NEXT: s_branch .LBB18_3 +; CHECK-NEXT: .LBB18_3: + %v1c = icmp ult i32 %v1, 12 + %v2c = icmp ugt i32 %v2, 34 + %c = and i1 %v1c, %v2c + %ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %c) + %ballot_ne_zero = icmp ne i32 %ballot, 0 + br i1 %ballot_ne_zero, label %true, label %false +true: + ret i32 42 +false: + ret i32 33 +} + +define amdgpu_cs i32 @branch_divergent_ballot_eq_zero_and(i32 %v1, i32 %v2) { +; CHECK-LABEL: branch_divergent_ballot_eq_zero_and: +; CHECK: ; %bb.0: +; CHECK-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0 +; CHECK-NEXT: v_cmp_lt_u32_e64 s0, 34, v1 +; CHECK-NEXT: s_and_b32 s0, vcc_lo, s0 +; CHECK-NEXT: s_cmp_lg_u32 s0, 0 +; CHECK-NEXT: s_cbranch_scc0 .LBB19_2 +; CHECK-NEXT: ; %bb.1: ; %false +; CHECK-NEXT: s_mov_b32 s0, 33 +; CHECK-NEXT: s_branch .LBB19_3 +; CHECK-NEXT: .LBB19_2: ; %true +; CHECK-NEXT: s_mov_b32 s0, 42 +; CHECK-NEXT: s_branch .LBB19_3 +; CHECK-NEXT: .LBB19_3: + %v1c = icmp ult i32 %v1, 12 + %v2c = icmp ugt i32 %v2, 34 + %c = and i1 %v1c, %v2c + %ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %c) + %ballot_eq_zero = icmp eq i32 %ballot, 0 + br i1 %ballot_eq_zero, label %true, label %false +true: + ret i32 42 +false: + ret i32 33 +} + +define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_and(i32 inreg %v1, i32 inreg %v2) { +; CHECK-LABEL: branch_uniform_ballot_eq_zero_and: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_cmp_lt_u32 s0, 12 +; CHECK-NEXT: s_cselect_b32 s0, 1, 0 +; CHECK-NEXT: s_cmp_gt_u32 s1, 34 +; CHECK-NEXT: s_cselect_b32 s1, 1, 0 +; CHECK-NEXT: s_and_b32 s0, s0, s1 +; CHECK-NEXT: s_and_b32 s0, 1, s0 +; CHECK-NEXT: v_cmp_ne_u32_e64 s0, 0, s0 +; CHECK-NEXT: s_cmp_lg_u32 s0, 0 +; CHECK-NEXT: s_cbranch_scc0 .LBB20_2 +; CHECK-NEXT: ; %bb.1: ; %false +; CHECK-NEXT: s_mov_b32 s0, 33 +; CHECK-NEXT: s_branch .LBB20_3 +; CHECK-NEXT: .LBB20_2: ; %true +; CHECK-NEXT: s_mov_b32 s0, 42 +; CHECK-NEXT: s_branch .LBB20_3 +; CHECK-NEXT: .LBB20_3: + %v1c = icmp ult i32 %v1, 12 + %v2c = icmp ugt i32 %v2, 34 + %c = and i1 %v1c, %v2c + %ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %c) + %ballot_eq_zero = icmp eq i32 %ballot, 0 + br i1 %ballot_eq_zero, label %true, label %false +true: + ret i32 42 +false: + ret i32 33 +} + +define amdgpu_cs i32 @branch_uniform_ballot_sgt_N_compare(i32 inreg %v) { +; CHECK-LABEL: branch_uniform_ballot_sgt_N_compare: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_cmp_lt_u32 s0, 12 +; CHECK-NEXT: s_cselect_b32 s0, 1, 0 +; CHECK-NEXT: s_and_b32 s0, 1, s0 +; CHECK-NEXT: v_cmp_ne_u32_e64 s0, 0, s0 +; CHECK-NEXT: s_cmp_le_i32 s0, 22 +; CHECK-NEXT: s_cbranch_scc1 .LBB21_2 +; CHECK-NEXT: ; %bb.1: ; %true +; CHECK-NEXT: s_mov_b32 s0, 42 +; CHECK-NEXT: s_branch .LBB21_3 +; CHECK-NEXT: .LBB21_2: ; %false +; CHECK-NEXT: s_mov_b32 s0, 33 +; CHECK-NEXT: s_branch .LBB21_3 +; CHECK-NEXT: .LBB21_3: + %c = icmp ult i32 %v, 12 + %ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %c) + %bc = icmp sgt i32 %ballot, 22 + br i1 %bc, label %true, label %false +true: + ret i32 42 +false: + ret i32 33 +} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ballot.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ballot.i64.ll index 9f83012f545750..d879779962dfca 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ballot.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ballot.i64.ll @@ -86,3 +86,339 @@ define amdgpu_cs i64 @ctpop_of_ballot(float %x, float %y) { %bcnt = call i64 @llvm.ctpop.i64(i64 %ballot) ret i64 %bcnt } + +define amdgpu_cs i32 @branch_divergent_ballot_ne_zero_non_compare(i32 %v) { +; CHECK-LABEL: branch_divergent_ballot_ne_zero_non_compare: +; CHECK: ; %bb.0: +; CHECK-NEXT: v_and_b32_e32 v0, 1, v0 +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; CHECK-NEXT: s_cmp_eq_u64 vcc, 0 +; CHECK-NEXT: s_cbranch_scc1 .LBB7_2 +; CHECK-NEXT: ; %bb.1: ; %true +; CHECK-NEXT: s_mov_b32 s0, 42 +; CHECK-NEXT: s_branch .LBB7_3 +; CHECK-NEXT: .LBB7_2: ; %false +; CHECK-NEXT: s_mov_b32 s0, 33 +; CHECK-NEXT: s_branch .LBB7_3 +; CHECK-NEXT: .LBB7_3: + %c = trunc i32 %v to i1 + %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %c) + %ballot_ne_zero = icmp ne i64 %ballot, 0 + br i1 %ballot_ne_zero, label %true, label %false +true: + ret i32 42 +false: + ret i32 33 +} + +define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_non_compare(i32 inreg %v) { +; CHECK-LABEL: branch_uniform_ballot_ne_zero_non_compare: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_and_b32 s0, 1, s0 +; CHECK-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0 +; CHECK-NEXT: s_cmp_eq_u64 s[0:1], 0 +; CHECK-NEXT: s_cbranch_scc1 .LBB8_2 +; CHECK-NEXT: ; %bb.1: ; %true +; CHECK-NEXT: s_mov_b32 s0, 42 +; CHECK-NEXT: s_branch .LBB8_3 +; CHECK-NEXT: .LBB8_2: ; %false +; CHECK-NEXT: s_mov_b32 s0, 33 +; CHECK-NEXT: s_branch .LBB8_3 +; CHECK-NEXT: .LBB8_3: + %c = trunc i32 %v to i1 + %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %c) + %ballot_ne_zero = icmp ne i64 %ballot, 0 + br i1 %ballot_ne_zero, label %true, label %false +true: + ret i32 42 +false: + ret i32 33 +} + +define amdgpu_cs i32 @branch_divergent_ballot_eq_zero_non_compare(i32 %v) { +; CHECK-LABEL: branch_divergent_ballot_eq_zero_non_compare: +; CHECK: ; %bb.0: +; CHECK-NEXT: v_and_b32_e32 v0, 1, v0 +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; CHECK-NEXT: s_cmp_lg_u64 vcc, 0 +; CHECK-NEXT: s_cbranch_scc0 .LBB9_2 +; CHECK-NEXT: ; %bb.1: ; %false +; CHECK-NEXT: s_mov_b32 s0, 33 +; CHECK-NEXT: s_branch .LBB9_3 +; CHECK-NEXT: .LBB9_2: ; %true +; CHECK-NEXT: s_mov_b32 s0, 42 +; CHECK-NEXT: s_branch .LBB9_3 +; CHECK-NEXT: .LBB9_3: + %c = trunc i32 %v to i1 + %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %c) + %ballot_eq_zero = icmp eq i64 %ballot, 0 + br i1 %ballot_eq_zero, label %true, label %false +true: + ret i32 42 +false: + ret i32 33 +} + +define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_non_compare(i32 inreg %v) { +; CHECK-LABEL: branch_uniform_ballot_eq_zero_non_compare: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_and_b32 s0, 1, s0 +; CHECK-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0 +; CHECK-NEXT: s_cmp_lg_u64 s[0:1], 0 +; CHECK-NEXT: s_cbranch_scc0 .LBB10_2 +; CHECK-NEXT: ; %bb.1: ; %false +; CHECK-NEXT: s_mov_b32 s0, 33 +; CHECK-NEXT: s_branch .LBB10_3 +; CHECK-NEXT: .LBB10_2: ; %true +; CHECK-NEXT: s_mov_b32 s0, 42 +; CHECK-NEXT: s_branch .LBB10_3 +; CHECK-NEXT: .LBB10_3: + %c = trunc i32 %v to i1 + %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %c) + %ballot_eq_zero = icmp eq i64 %ballot, 0 + br i1 %ballot_eq_zero, label %true, label %false +true: + ret i32 42 +false: + ret i32 33 +} + +define amdgpu_cs i32 @branch_divergent_ballot_ne_zero_compare(i32 %v) { +; CHECK-LABEL: branch_divergent_ballot_ne_zero_compare: +; CHECK: ; %bb.0: +; CHECK-NEXT: v_cmp_gt_u32_e32 vcc, 12, v0 +; CHECK-NEXT: s_cmp_eq_u64 vcc, 0 +; CHECK-NEXT: s_cbranch_scc1 .LBB11_2 +; CHECK-NEXT: ; %bb.1: ; %true +; CHECK-NEXT: s_mov_b32 s0, 42 +; CHECK-NEXT: s_branch .LBB11_3 +; CHECK-NEXT: .LBB11_2: ; %false +; CHECK-NEXT: s_mov_b32 s0, 33 +; CHECK-NEXT: s_branch .LBB11_3 +; CHECK-NEXT: .LBB11_3: + %c = icmp ult i32 %v, 12 + %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %c) + %ballot_ne_zero = icmp ne i64 %ballot, 0 + br i1 %ballot_ne_zero, label %true, label %false +true: + ret i32 42 +false: + ret i32 33 +} + +define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_compare(i32 inreg %v) { +; CHECK-LABEL: branch_uniform_ballot_ne_zero_compare: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_cmp_lt_u32 s0, 12 +; CHECK-NEXT: s_cselect_b32 s0, 1, 0 +; CHECK-NEXT: s_and_b32 s0, 1, s0 +; CHECK-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0 +; CHECK-NEXT: s_cmp_eq_u64 s[0:1], 0 +; CHECK-NEXT: s_cbranch_scc1 .LBB12_2 +; CHECK-NEXT: ; %bb.1: ; %true +; CHECK-NEXT: s_mov_b32 s0, 42 +; CHECK-NEXT: s_branch .LBB12_3 +; CHECK-NEXT: .LBB12_2: ; %false +; CHECK-NEXT: s_mov_b32 s0, 33 +; CHECK-NEXT: s_branch .LBB12_3 +; CHECK-NEXT: .LBB12_3: + %c = icmp ult i32 %v, 12 + %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %c) + %ballot_ne_zero = icmp ne i64 %ballot, 0 + br i1 %ballot_ne_zero, label %true, label %false +true: + ret i32 42 +false: + ret i32 33 +} + +define amdgpu_cs i32 @branch_divergent_ballot_eq_zero_compare(i32 %v) { +; CHECK-LABEL: branch_divergent_ballot_eq_zero_compare: +; CHECK: ; %bb.0: +; CHECK-NEXT: v_cmp_gt_u32_e32 vcc, 12, v0 +; CHECK-NEXT: s_cmp_lg_u64 vcc, 0 +; CHECK-NEXT: s_cbranch_scc0 .LBB13_2 +; CHECK-NEXT: ; %bb.1: ; %false +; CHECK-NEXT: s_mov_b32 s0, 33 +; CHECK-NEXT: s_branch .LBB13_3 +; CHECK-NEXT: .LBB13_2: ; %true +; CHECK-NEXT: s_mov_b32 s0, 42 +; CHECK-NEXT: s_branch .LBB13_3 +; CHECK-NEXT: .LBB13_3: + %c = icmp ult i32 %v, 12 + %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %c) + %ballot_eq_zero = icmp eq i64 %ballot, 0 + br i1 %ballot_eq_zero, label %true, label %false +true: + ret i32 42 +false: + ret i32 33 +} + +define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_compare(i32 inreg %v) { +; CHECK-LABEL: branch_uniform_ballot_eq_zero_compare: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_cmp_lt_u32 s0, 12 +; CHECK-NEXT: s_cselect_b32 s0, 1, 0 +; CHECK-NEXT: s_and_b32 s0, 1, s0 +; CHECK-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0 +; CHECK-NEXT: s_cmp_lg_u64 s[0:1], 0 +; CHECK-NEXT: s_cbranch_scc0 .LBB14_2 +; CHECK-NEXT: ; %bb.1: ; %false +; CHECK-NEXT: s_mov_b32 s0, 33 +; CHECK-NEXT: s_branch .LBB14_3 +; CHECK-NEXT: .LBB14_2: ; %true +; CHECK-NEXT: s_mov_b32 s0, 42 +; CHECK-NEXT: s_branch .LBB14_3 +; CHECK-NEXT: .LBB14_3: + %c = icmp ult i32 %v, 12 + %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %c) + %ballot_eq_zero = icmp eq i64 %ballot, 0 + br i1 %ballot_eq_zero, label %true, label %false +true: + ret i32 42 +false: + ret i32 33 +} + +define amdgpu_cs i32 @branch_divergent_ballot_ne_zero_and(i32 %v1, i32 %v2) { +; CHECK-LABEL: branch_divergent_ballot_ne_zero_and: +; CHECK: ; %bb.0: +; CHECK-NEXT: v_cmp_gt_u32_e32 vcc, 12, v0 +; CHECK-NEXT: v_cmp_lt_u32_e64 s[0:1], 34, v1 +; CHECK-NEXT: s_and_b64 s[0:1], vcc, s[0:1] +; CHECK-NEXT: s_cmp_eq_u64 s[0:1], 0 +; CHECK-NEXT: s_cbranch_scc1 .LBB15_2 +; CHECK-NEXT: ; %bb.1: ; %true +; CHECK-NEXT: s_mov_b32 s0, 42 +; CHECK-NEXT: s_branch .LBB15_3 +; CHECK-NEXT: .LBB15_2: ; %false +; CHECK-NEXT: s_mov_b32 s0, 33 +; CHECK-NEXT: s_branch .LBB15_3 +; CHECK-NEXT: .LBB15_3: + %v1c = icmp ult i32 %v1, 12 + %v2c = icmp ugt i32 %v2, 34 + %c = and i1 %v1c, %v2c + %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %c) + %ballot_ne_zero = icmp ne i64 %ballot, 0 + br i1 %ballot_ne_zero, label %true, label %false +true: + ret i32 42 +false: + ret i32 33 +} + +define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_and(i32 inreg %v1, i32 inreg %v2) { +; CHECK-LABEL: branch_uniform_ballot_ne_zero_and: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_cmp_lt_u32 s0, 12 +; CHECK-NEXT: s_cselect_b32 s0, 1, 0 +; CHECK-NEXT: s_cmp_gt_u32 s1, 34 +; CHECK-NEXT: s_cselect_b32 s1, 1, 0 +; CHECK-NEXT: s_and_b32 s0, s0, s1 +; CHECK-NEXT: s_and_b32 s0, 1, s0 +; CHECK-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0 +; CHECK-NEXT: s_cmp_eq_u64 s[0:1], 0 +; CHECK-NEXT: s_cbranch_scc1 .LBB16_2 +; CHECK-NEXT: ; %bb.1: ; %true +; CHECK-NEXT: s_mov_b32 s0, 42 +; CHECK-NEXT: s_branch .LBB16_3 +; CHECK-NEXT: .LBB16_2: ; %false +; CHECK-NEXT: s_mov_b32 s0, 33 +; CHECK-NEXT: s_branch .LBB16_3 +; CHECK-NEXT: .LBB16_3: + %v1c = icmp ult i32 %v1, 12 + %v2c = icmp ugt i32 %v2, 34 + %c = and i1 %v1c, %v2c + %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %c) + %ballot_ne_zero = icmp ne i64 %ballot, 0 + br i1 %ballot_ne_zero, label %true, label %false +true: + ret i32 42 +false: + ret i32 33 +} + +define amdgpu_cs i32 @branch_divergent_ballot_eq_zero_and(i32 %v1, i32 %v2) { +; CHECK-LABEL: branch_divergent_ballot_eq_zero_and: +; CHECK: ; %bb.0: +; CHECK-NEXT: v_cmp_gt_u32_e32 vcc, 12, v0 +; CHECK-NEXT: v_cmp_lt_u32_e64 s[0:1], 34, v1 +; CHECK-NEXT: s_and_b64 s[0:1], vcc, s[0:1] +; CHECK-NEXT: s_cmp_lg_u64 s[0:1], 0 +; CHECK-NEXT: s_cbranch_scc0 .LBB17_2 +; CHECK-NEXT: ; %bb.1: ; %false +; CHECK-NEXT: s_mov_b32 s0, 33 +; CHECK-NEXT: s_branch .LBB17_3 +; CHECK-NEXT: .LBB17_2: ; %true +; CHECK-NEXT: s_mov_b32 s0, 42 +; CHECK-NEXT: s_branch .LBB17_3 +; CHECK-NEXT: .LBB17_3: + %v1c = icmp ult i32 %v1, 12 + %v2c = icmp ugt i32 %v2, 34 + %c = and i1 %v1c, %v2c + %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %c) + %ballot_eq_zero = icmp eq i64 %ballot, 0 + br i1 %ballot_eq_zero, label %true, label %false +true: + ret i32 42 +false: + ret i32 33 +} + +define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_and(i32 inreg %v1, i32 inreg %v2) { +; CHECK-LABEL: branch_uniform_ballot_eq_zero_and: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_cmp_lt_u32 s0, 12 +; CHECK-NEXT: s_cselect_b32 s0, 1, 0 +; CHECK-NEXT: s_cmp_gt_u32 s1, 34 +; CHECK-NEXT: s_cselect_b32 s1, 1, 0 +; CHECK-NEXT: s_and_b32 s0, s0, s1 +; CHECK-NEXT: s_and_b32 s0, 1, s0 +; CHECK-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0 +; CHECK-NEXT: s_cmp_lg_u64 s[0:1], 0 +; CHECK-NEXT: s_cbranch_scc0 .LBB18_2 +; CHECK-NEXT: ; %bb.1: ; %false +; CHECK-NEXT: s_mov_b32 s0, 33 +; CHECK-NEXT: s_branch .LBB18_3 +; CHECK-NEXT: .LBB18_2: ; %true +; CHECK-NEXT: s_mov_b32 s0, 42 +; CHECK-NEXT: s_branch .LBB18_3 +; CHECK-NEXT: .LBB18_3: + %v1c = icmp ult i32 %v1, 12 + %v2c = icmp ugt i32 %v2, 34 + %c = and i1 %v1c, %v2c + %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %c) + %ballot_eq_zero = icmp eq i64 %ballot, 0 + br i1 %ballot_eq_zero, label %true, label %false +true: + ret i32 42 +false: + ret i32 33 +} + +define amdgpu_cs i32 @branch_uniform_ballot_sgt_N_compare(i32 inreg %v) { +; CHECK-LABEL: branch_uniform_ballot_sgt_N_compare: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_cmp_lt_u32 s0, 12 +; CHECK-NEXT: s_cselect_b32 s0, 1, 0 +; CHECK-NEXT: s_and_b32 s0, 1, s0 +; CHECK-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0 +; CHECK-NEXT: v_cmp_le_i64_e64 vcc, s[0:1], 22 +; CHECK-NEXT: s_cbranch_vccnz .LBB19_2 +; CHECK-NEXT: ; %bb.1: ; %true +; CHECK-NEXT: s_mov_b32 s0, 42 +; CHECK-NEXT: s_branch .LBB19_3 +; CHECK-NEXT: .LBB19_2: ; %false +; CHECK-NEXT: s_mov_b32 s0, 33 +; CHECK-NEXT: s_branch .LBB19_3 +; CHECK-NEXT: .LBB19_3: + %c = icmp ult i32 %v, 12 + %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %c) + %bc = icmp sgt i64 %ballot, 22 + br i1 %bc, label %true, label %false +true: + ret i32 42 +false: + ret i32 33 +} diff --git a/llvm/test/CodeGen/AMDGPU/bb-prolog-spill-during-regalloc.ll b/llvm/test/CodeGen/AMDGPU/bb-prolog-spill-during-regalloc.ll index e22cb912552f97..8cb9e9f992f546 100644 --- a/llvm/test/CodeGen/AMDGPU/bb-prolog-spill-during-regalloc.ll +++ b/llvm/test/CodeGen/AMDGPU/bb-prolog-spill-during-regalloc.ll @@ -69,8 +69,7 @@ define i32 @prolog_spill(i32 %arg0, i32 %arg1, i32 %arg2) { ; REGALLOC-NEXT: $sgpr5 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 3 ; REGALLOC-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def dead $scc ; REGALLOC-NEXT: $vgpr0 = SI_SPILL_V32_RESTORE %stack.6, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.6, addrspace 5) - ; REGALLOC-NEXT: renamable $sgpr4 = S_MOV_B32 5 - ; REGALLOC-NEXT: renamable $vgpr0 = V_MUL_LO_U32_e64 killed $vgpr0, killed $sgpr4, implicit $exec + ; REGALLOC-NEXT: renamable $vgpr0 = V_LSHL_ADD_U32_e64 killed $vgpr0, 2, $vgpr0, implicit $exec ; REGALLOC-NEXT: KILL killed renamable $vgpr1 ; REGALLOC-NEXT: SI_RETURN implicit killed $vgpr0 bb.0: diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i32.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i32.ll index 7e719e86b521fd..3337d053eb930b 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i32.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i32.ll @@ -3,6 +3,7 @@ ; RUN: llc -march=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -mattr=+wavefrontsize32,-wavefrontsize64 < %s | FileCheck %s declare i32 @llvm.amdgcn.ballot.i32(i1) +declare i64 @llvm.amdgcn.ballot.i64(i1) declare i32 @llvm.ctpop.i32(i32) ; Test ballot(0) @@ -83,3 +84,496 @@ define amdgpu_cs i32 @ctpop_of_ballot(float %x, float %y) { %bcnt = call i32 @llvm.ctpop.i32(i32 %ballot) ret i32 %bcnt } + +define amdgpu_cs i32 @branch_divergent_ballot_ne_zero_non_compare(i32 %v) { +; CHECK-LABEL: branch_divergent_ballot_ne_zero_non_compare: +; CHECK: ; %bb.0: +; CHECK-NEXT: v_and_b32_e32 v0, 1, v0 +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 +; CHECK-NEXT: s_cbranch_vccz .LBB7_2 +; CHECK-NEXT: ; %bb.1: ; %true +; CHECK-NEXT: s_mov_b32 s0, 42 +; CHECK-NEXT: s_branch .LBB7_3 +; CHECK-NEXT: .LBB7_2: ; %false +; CHECK-NEXT: s_mov_b32 s0, 33 +; CHECK-NEXT: s_branch .LBB7_3 +; CHECK-NEXT: .LBB7_3: + %c = trunc i32 %v to i1 + %ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %c) + %ballot_ne_zero = icmp ne i32 %ballot, 0 + br i1 %ballot_ne_zero, label %true, label %false +true: + ret i32 42 +false: + ret i32 33 +} + +define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_non_compare(i32 inreg %v) { +; CHECK-LABEL: branch_uniform_ballot_ne_zero_non_compare: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_and_b32 s0, s0, 1 +; CHECK-NEXT: v_cmp_ne_u32_e64 vcc_lo, s0, 0 +; CHECK-NEXT: s_cbranch_vccz .LBB8_2 +; CHECK-NEXT: ; %bb.1: ; %true +; CHECK-NEXT: s_mov_b32 s0, 42 +; CHECK-NEXT: s_branch .LBB8_3 +; CHECK-NEXT: .LBB8_2: ; %false +; CHECK-NEXT: s_mov_b32 s0, 33 +; CHECK-NEXT: s_branch .LBB8_3 +; CHECK-NEXT: .LBB8_3: + %c = trunc i32 %v to i1 + %ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %c) + %ballot_ne_zero = icmp ne i32 %ballot, 0 + br i1 %ballot_ne_zero, label %true, label %false +true: + ret i32 42 +false: + ret i32 33 +} + +define amdgpu_cs i32 @branch_divergent_ballot_eq_zero_non_compare(i32 %v) { +; CHECK-LABEL: branch_divergent_ballot_eq_zero_non_compare: +; CHECK: ; %bb.0: +; CHECK-NEXT: v_and_b32_e32 v0, 1, v0 +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 +; CHECK-NEXT: s_cbranch_vccz .LBB9_2 +; CHECK-NEXT: ; %bb.1: ; %false +; CHECK-NEXT: s_mov_b32 s0, 33 +; CHECK-NEXT: s_branch .LBB9_3 +; CHECK-NEXT: .LBB9_2: ; %true +; CHECK-NEXT: s_mov_b32 s0, 42 +; CHECK-NEXT: s_branch .LBB9_3 +; CHECK-NEXT: .LBB9_3: + %c = trunc i32 %v to i1 + %ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %c) + %ballot_eq_zero = icmp eq i32 %ballot, 0 + br i1 %ballot_eq_zero, label %true, label %false +true: + ret i32 42 +false: + ret i32 33 +} + +define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_non_compare(i32 inreg %v) { +; CHECK-LABEL: branch_uniform_ballot_eq_zero_non_compare: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_and_b32 s0, s0, 1 +; CHECK-NEXT: v_cmp_ne_u32_e64 vcc_lo, s0, 0 +; CHECK-NEXT: s_cbranch_vccz .LBB10_2 +; CHECK-NEXT: ; %bb.1: ; %false +; CHECK-NEXT: s_mov_b32 s0, 33 +; CHECK-NEXT: s_branch .LBB10_3 +; CHECK-NEXT: .LBB10_2: ; %true +; CHECK-NEXT: s_mov_b32 s0, 42 +; CHECK-NEXT: s_branch .LBB10_3 +; CHECK-NEXT: .LBB10_3: + %c = trunc i32 %v to i1 + %ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %c) + %ballot_eq_zero = icmp eq i32 %ballot, 0 + br i1 %ballot_eq_zero, label %true, label %false +true: + ret i32 42 +false: + ret i32 33 +} + +define amdgpu_cs i32 @branch_divergent_ballot_ne_zero_compare(i32 %v) { +; CHECK-LABEL: branch_divergent_ballot_ne_zero_compare: +; CHECK: ; %bb.0: +; CHECK-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0 +; CHECK-NEXT: s_cbranch_vccz .LBB11_2 +; CHECK-NEXT: ; %bb.1: ; %true +; CHECK-NEXT: s_mov_b32 s0, 42 +; CHECK-NEXT: s_branch .LBB11_3 +; CHECK-NEXT: .LBB11_2: ; %false +; CHECK-NEXT: s_mov_b32 s0, 33 +; CHECK-NEXT: s_branch .LBB11_3 +; CHECK-NEXT: .LBB11_3: + %c = icmp ult i32 %v, 12 + %ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %c) + %ballot_ne_zero = icmp ne i32 %ballot, 0 + br i1 %ballot_ne_zero, label %true, label %false +true: + ret i32 42 +false: + ret i32 33 +} + +define amdgpu_cs i32 @branch_divergent_ballot64_ne_zero_compare(i32 %v) { +; CHECK-LABEL: branch_divergent_ballot64_ne_zero_compare: +; CHECK: ; %bb.0: +; CHECK-NEXT: v_cmp_gt_u32_e64 s0, 12, v0 +; CHECK-NEXT: s_mov_b32 s1, 0 +; CHECK-NEXT: s_cmp_eq_u64 s[0:1], 0 +; CHECK-NEXT: s_cbranch_scc1 .LBB12_2 +; CHECK-NEXT: ; %bb.1: ; %true +; CHECK-NEXT: s_mov_b32 s0, 42 +; CHECK-NEXT: s_branch .LBB12_3 +; CHECK-NEXT: .LBB12_2: ; %false +; CHECK-NEXT: s_mov_b32 s0, 33 +; CHECK-NEXT: s_branch .LBB12_3 +; CHECK-NEXT: .LBB12_3: + %c = icmp ult i32 %v, 12 + %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %c) + %ballot_ne_zero = icmp ne i64 %ballot, 0 + br i1 %ballot_ne_zero, label %true, label %false +true: + ret i32 42 +false: + ret i32 33 +} + +define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_compare(i32 inreg %v) { +; CHECK-LABEL: branch_uniform_ballot_ne_zero_compare: +; CHECK: ; %bb.0: +; CHECK-NEXT: v_cmp_lt_u32_e64 vcc_lo, s0, 12 +; CHECK-NEXT: s_cbranch_vccz .LBB13_2 +; CHECK-NEXT: ; %bb.1: ; %true +; CHECK-NEXT: s_mov_b32 s0, 42 +; CHECK-NEXT: s_branch .LBB13_3 +; CHECK-NEXT: .LBB13_2: ; %false +; CHECK-NEXT: s_mov_b32 s0, 33 +; CHECK-NEXT: s_branch .LBB13_3 +; CHECK-NEXT: .LBB13_3: + %c = icmp ult i32 %v, 12 + %ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %c) + %ballot_ne_zero = icmp ne i32 %ballot, 0 + br i1 %ballot_ne_zero, label %true, label %false +true: + ret i32 42 +false: + ret i32 33 +} + +define amdgpu_cs i32 @branch_divergent_ballot_eq_zero_compare(i32 %v) { +; CHECK-LABEL: branch_divergent_ballot_eq_zero_compare: +; CHECK: ; %bb.0: +; CHECK-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0 +; CHECK-NEXT: s_cbranch_vccz .LBB14_2 +; CHECK-NEXT: ; %bb.1: ; %false +; CHECK-NEXT: s_mov_b32 s0, 33 +; CHECK-NEXT: s_branch .LBB14_3 +; CHECK-NEXT: .LBB14_2: ; %true +; CHECK-NEXT: s_mov_b32 s0, 42 +; CHECK-NEXT: s_branch .LBB14_3 +; CHECK-NEXT: .LBB14_3: + %c = icmp ult i32 %v, 12 + %ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %c) + %ballot_eq_zero = icmp eq i32 %ballot, 0 + br i1 %ballot_eq_zero, label %true, label %false +true: + ret i32 42 +false: + ret i32 33 +} + +define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_compare(i32 inreg %v) { +; CHECK-LABEL: branch_uniform_ballot_eq_zero_compare: +; CHECK: ; %bb.0: +; CHECK-NEXT: v_cmp_lt_u32_e64 vcc_lo, s0, 12 +; CHECK-NEXT: s_cbranch_vccz .LBB15_2 +; CHECK-NEXT: ; %bb.1: ; %false +; CHECK-NEXT: s_mov_b32 s0, 33 +; CHECK-NEXT: s_branch .LBB15_3 +; CHECK-NEXT: .LBB15_2: ; %true +; CHECK-NEXT: s_mov_b32 s0, 42 +; CHECK-NEXT: s_branch .LBB15_3 +; CHECK-NEXT: .LBB15_3: + %c = icmp ult i32 %v, 12 + %ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %c) + %ballot_eq_zero = icmp eq i32 %ballot, 0 + br i1 %ballot_eq_zero, label %true, label %false +true: + ret i32 42 +false: + ret i32 33 +} + +define amdgpu_cs i32 @branch_divergent_ballot_ne_zero_and(i32 %v1, i32 %v2) { +; CHECK-LABEL: branch_divergent_ballot_ne_zero_and: +; CHECK: ; %bb.0: +; CHECK-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0 +; CHECK-NEXT: v_cmp_lt_u32_e64 s0, 34, v1 +; CHECK-NEXT: s_and_b32 vcc_lo, vcc_lo, s0 +; CHECK-NEXT: s_cbranch_vccz .LBB16_2 +; CHECK-NEXT: ; %bb.1: ; %true +; CHECK-NEXT: s_mov_b32 s0, 42 +; CHECK-NEXT: s_branch .LBB16_3 +; CHECK-NEXT: .LBB16_2: ; %false +; CHECK-NEXT: s_mov_b32 s0, 33 +; CHECK-NEXT: s_branch .LBB16_3 +; CHECK-NEXT: .LBB16_3: + %v1c = icmp ult i32 %v1, 12 + %v2c = icmp ugt i32 %v2, 34 + %c = and i1 %v1c, %v2c + %ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %c) + %ballot_ne_zero = icmp ne i32 %ballot, 0 + br i1 %ballot_ne_zero, label %true, label %false +true: + ret i32 42 +false: + ret i32 33 +} + +define amdgpu_cs i32 @branch_divergent_ballot64_ne_zero_and(i32 %v1, i32 %v2) { +; CHECK-LABEL: branch_divergent_ballot64_ne_zero_and: +; CHECK: ; %bb.0: +; CHECK-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0 +; CHECK-NEXT: v_cmp_lt_u32_e64 s0, 34, v1 +; CHECK-NEXT: s_mov_b32 s1, 0 +; CHECK-NEXT: s_and_b32 s0, vcc_lo, s0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; CHECK-NEXT: v_cmp_ne_u32_e64 s0, 0, v0 +; CHECK-NEXT: s_cmp_eq_u64 s[0:1], 0 +; CHECK-NEXT: s_cbranch_scc1 .LBB17_2 +; CHECK-NEXT: ; %bb.1: ; %true +; CHECK-NEXT: s_mov_b32 s0, 42 +; CHECK-NEXT: s_branch .LBB17_3 +; CHECK-NEXT: .LBB17_2: ; %false +; CHECK-NEXT: s_mov_b32 s0, 33 +; CHECK-NEXT: s_branch .LBB17_3 +; CHECK-NEXT: .LBB17_3: + %v1c = icmp ult i32 %v1, 12 + %v2c = icmp ugt i32 %v2, 34 + %c = and i1 %v1c, %v2c + %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %c) + %ballot_ne_zero = icmp ne i64 %ballot, 0 + br i1 %ballot_ne_zero, label %true, label %false +true: + ret i32 42 +false: + ret i32 33 +} + +define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_and(i32 inreg %v1, i32 inreg %v2) { +; CHECK-LABEL: branch_uniform_ballot_ne_zero_and: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_cmp_lt_u32 s0, 12 +; CHECK-NEXT: s_cselect_b32 s0, -1, 0 +; CHECK-NEXT: s_cmp_gt_u32 s1, 34 +; CHECK-NEXT: s_cselect_b32 s1, -1, 0 +; CHECK-NEXT: s_and_b32 s0, s0, s1 +; CHECK-NEXT: s_and_b32 s0, s0, exec_lo +; CHECK-NEXT: s_cbranch_scc0 .LBB18_2 +; CHECK-NEXT: ; %bb.1: ; %true +; CHECK-NEXT: s_mov_b32 s0, 42 +; CHECK-NEXT: s_branch .LBB18_3 +; CHECK-NEXT: .LBB18_2: ; %false +; CHECK-NEXT: s_mov_b32 s0, 33 +; CHECK-NEXT: s_branch .LBB18_3 +; CHECK-NEXT: .LBB18_3: + %v1c = icmp ult i32 %v1, 12 + %v2c = icmp ugt i32 %v2, 34 + %c = and i1 %v1c, %v2c + %ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %c) + %ballot_ne_zero = icmp ne i32 %ballot, 0 + br i1 %ballot_ne_zero, label %true, label %false +true: + ret i32 42 +false: + ret i32 33 +} + +define amdgpu_cs i32 @branch_divergent_ballot_eq_zero_and(i32 %v1, i32 %v2) { +; CHECK-LABEL: branch_divergent_ballot_eq_zero_and: +; CHECK: ; %bb.0: +; CHECK-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0 +; CHECK-NEXT: v_cmp_lt_u32_e64 s0, 34, v1 +; CHECK-NEXT: s_and_b32 vcc_lo, vcc_lo, s0 +; CHECK-NEXT: s_cbranch_vccz .LBB19_2 +; CHECK-NEXT: ; %bb.1: ; %false +; CHECK-NEXT: s_mov_b32 s0, 33 +; CHECK-NEXT: s_branch .LBB19_3 +; CHECK-NEXT: .LBB19_2: ; %true +; CHECK-NEXT: s_mov_b32 s0, 42 +; CHECK-NEXT: s_branch .LBB19_3 +; CHECK-NEXT: .LBB19_3: + %v1c = icmp ult i32 %v1, 12 + %v2c = icmp ugt i32 %v2, 34 + %c = and i1 %v1c, %v2c + %ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %c) + %ballot_eq_zero = icmp eq i32 %ballot, 0 + br i1 %ballot_eq_zero, label %true, label %false +true: + ret i32 42 +false: + ret i32 33 +} + +define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_and(i32 inreg %v1, i32 inreg %v2) { +; CHECK-LABEL: branch_uniform_ballot_eq_zero_and: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_cmp_lt_u32 s0, 12 +; CHECK-NEXT: s_cselect_b32 s0, -1, 0 +; CHECK-NEXT: s_cmp_gt_u32 s1, 34 +; CHECK-NEXT: s_cselect_b32 s1, -1, 0 +; CHECK-NEXT: s_and_b32 s0, s0, s1 +; CHECK-NEXT: s_and_b32 s0, s0, exec_lo +; CHECK-NEXT: s_cbranch_scc0 .LBB20_2 +; CHECK-NEXT: ; %bb.1: ; %false +; CHECK-NEXT: s_mov_b32 s0, 33 +; CHECK-NEXT: s_branch .LBB20_3 +; CHECK-NEXT: .LBB20_2: ; %true +; CHECK-NEXT: s_mov_b32 s0, 42 +; CHECK-NEXT: s_branch .LBB20_3 +; CHECK-NEXT: .LBB20_3: + %v1c = icmp ult i32 %v1, 12 + %v2c = icmp ugt i32 %v2, 34 + %c = and i1 %v1c, %v2c + %ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %c) + %ballot_eq_zero = icmp eq i32 %ballot, 0 + br i1 %ballot_eq_zero, label %true, label %false +true: + ret i32 42 +false: + ret i32 33 +} + +define amdgpu_cs i32 @branch_uniform_ballot_sgt_N_compare(i32 inreg %v) { +; CHECK-LABEL: branch_uniform_ballot_sgt_N_compare: +; CHECK: ; %bb.0: +; CHECK-NEXT: v_cmp_lt_u32_e64 s0, s0, 12 +; CHECK-NEXT: s_cmp_lt_i32 s0, 23 +; CHECK-NEXT: s_cbranch_scc1 .LBB21_2 +; CHECK-NEXT: ; %bb.1: ; %true +; CHECK-NEXT: s_mov_b32 s0, 42 +; CHECK-NEXT: s_branch .LBB21_3 +; CHECK-NEXT: .LBB21_2: ; %false +; CHECK-NEXT: s_mov_b32 s0, 33 +; CHECK-NEXT: s_branch .LBB21_3 +; CHECK-NEXT: .LBB21_3: + %c = icmp ult i32 %v, 12 + %ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %c) + %bc = icmp sgt i32 %ballot, 22 + br i1 %bc, label %true, label %false +true: + ret i32 42 +false: + ret i32 33 +} + +declare i32 @llvm.amdgcn.icmp.i32(i1, i1, i32) + +define amdgpu_cs i32 @branch_divergent_simulated_negated_ballot_ne_zero_and(i32 %v1, i32 %v2) { +; CHECK-LABEL: branch_divergent_simulated_negated_ballot_ne_zero_and: +; CHECK: ; %bb.0: +; CHECK-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0 +; CHECK-NEXT: v_cmp_lt_u32_e64 s0, 34, v1 +; CHECK-NEXT: s_and_b32 vcc_lo, vcc_lo, s0 +; CHECK-NEXT: s_cbranch_vccnz .LBB22_2 +; CHECK-NEXT: ; %bb.1: ; %true +; CHECK-NEXT: s_mov_b32 s0, 42 +; CHECK-NEXT: s_branch .LBB22_3 +; CHECK-NEXT: .LBB22_2: ; %false +; CHECK-NEXT: s_mov_b32 s0, 33 +; CHECK-NEXT: s_branch .LBB22_3 +; CHECK-NEXT: .LBB22_3: + %v1c = icmp ult i32 %v1, 12 + %v2c = icmp ugt i32 %v2, 34 + %c = and i1 %v1c, %v2c + %ballot = call i32 @llvm.amdgcn.icmp.i32(i1 %c, i1 0, i32 32) ; ICMP_EQ == 32 + %ballot_ne_zero = icmp ne i32 %ballot, 0 + br i1 %ballot_ne_zero, label %true, label %false +true: + ret i32 42 +false: + ret i32 33 +} + +define amdgpu_cs i32 @branch_uniform_simulated_negated_ballot_ne_zero_and(i32 inreg %v1, i32 inreg %v2) { +; TODO: +; s_cmp_lt_u32 s0, 12 +; s_cselect_b32 s0, -1, 0 +; s_cmp_gt_u32 s1, 34 +; s_cselect_b32 s1, -1, 0 +; s_and_b32 s0, s0, s1 +; s_and_b32 s0, s0, exec_lo +; could be improved to: +; s_cmp_lt_u32 s0, 12 +; s_cselect_b32 s0, -1, 0 +; s_cmp_gt_u32 s1, 34 +; s_cselect_b32 s0, s0, 0 +; s_and_b32 s0, s0, exec_lo +; By selecting into vcc(_lo) instead, we could even avoid the AND-with-exec. +; CHECK-LABEL: branch_uniform_simulated_negated_ballot_ne_zero_and: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_cmp_lt_u32 s0, 12 +; CHECK-NEXT: s_cselect_b32 s0, -1, 0 +; CHECK-NEXT: s_cmp_gt_u32 s1, 34 +; CHECK-NEXT: s_cselect_b32 s1, -1, 0 +; CHECK-NEXT: s_and_b32 s0, s0, s1 +; CHECK-NEXT: s_and_b32 s0, s0, exec_lo +; CHECK-NEXT: s_cbranch_scc1 .LBB23_2 +; CHECK-NEXT: ; %bb.1: ; %true +; CHECK-NEXT: s_mov_b32 s0, 42 +; CHECK-NEXT: s_branch .LBB23_3 +; CHECK-NEXT: .LBB23_2: ; %false +; CHECK-NEXT: s_mov_b32 s0, 33 +; CHECK-NEXT: s_branch .LBB23_3 +; CHECK-NEXT: .LBB23_3: + %v1c = icmp ult i32 %v1, 12 + %v2c = icmp ugt i32 %v2, 34 + %c = and i1 %v1c, %v2c + %ballot = call i32 @llvm.amdgcn.icmp.i32(i1 %c, i1 0, i32 32) ; ICMP_EQ == 32 + %ballot_ne_zero = icmp ne i32 %ballot, 0 + br i1 %ballot_ne_zero, label %true, label %false +true: + ret i32 42 +false: + ret i32 33 +} + +define amdgpu_cs i32 @branch_divergent_simulated_negated_ballot_eq_zero_and(i32 %v1, i32 %v2) { +; CHECK-LABEL: branch_divergent_simulated_negated_ballot_eq_zero_and: +; CHECK: ; %bb.0: +; CHECK-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0 +; CHECK-NEXT: v_cmp_lt_u32_e64 s0, 34, v1 +; CHECK-NEXT: s_and_b32 vcc_lo, vcc_lo, s0 +; CHECK-NEXT: s_cbranch_vccnz .LBB24_2 +; CHECK-NEXT: ; %bb.1: ; %false +; CHECK-NEXT: s_mov_b32 s0, 33 +; CHECK-NEXT: s_branch .LBB24_3 +; CHECK-NEXT: .LBB24_2: ; %true +; CHECK-NEXT: s_mov_b32 s0, 42 +; CHECK-NEXT: s_branch .LBB24_3 +; CHECK-NEXT: .LBB24_3: + %v1c = icmp ult i32 %v1, 12 + %v2c = icmp ugt i32 %v2, 34 + %c = and i1 %v1c, %v2c + %ballot = call i32 @llvm.amdgcn.icmp.i32(i1 %c, i1 0, i32 32) ; ICMP_EQ == 32 + %ballot_eq_zero = icmp eq i32 %ballot, 0 + br i1 %ballot_eq_zero, label %true, label %false +true: + ret i32 42 +false: + ret i32 33 +} + +define amdgpu_cs i32 @branch_uniform_simulated_negated_ballot_eq_zero_and(i32 inreg %v1, i32 inreg %v2) { +; CHECK-LABEL: branch_uniform_simulated_negated_ballot_eq_zero_and: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_cmp_lt_u32 s0, 12 +; CHECK-NEXT: s_cselect_b32 s0, -1, 0 +; CHECK-NEXT: s_cmp_gt_u32 s1, 34 +; CHECK-NEXT: s_cselect_b32 s1, -1, 0 +; CHECK-NEXT: s_and_b32 s0, s0, s1 +; CHECK-NEXT: s_and_b32 s0, s0, exec_lo +; CHECK-NEXT: s_cbranch_scc1 .LBB25_2 +; CHECK-NEXT: ; %bb.1: ; %false +; CHECK-NEXT: s_mov_b32 s0, 33 +; CHECK-NEXT: s_branch .LBB25_3 +; CHECK-NEXT: .LBB25_2: ; %true +; CHECK-NEXT: s_mov_b32 s0, 42 +; CHECK-NEXT: s_branch .LBB25_3 +; CHECK-NEXT: .LBB25_3: + %v1c = icmp ult i32 %v1, 12 + %v2c = icmp ugt i32 %v2, 34 + %c = and i1 %v1c, %v2c + %ballot = call i32 @llvm.amdgcn.icmp.i32(i1 %c, i1 0, i32 32) ; ICMP_EQ == 32 + %ballot_eq_zero = icmp eq i32 %ballot, 0 + br i1 %ballot_eq_zero, label %true, label %false +true: + ret i32 42 +false: + ret i32 33 +} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i64.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i64.ll index 473159ec20e552..217f930a643da7 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i64.ll @@ -86,3 +86,428 @@ define amdgpu_cs i64 @ctpop_of_ballot(float %x, float %y) { %bcnt = call i64 @llvm.ctpop.i64(i64 %ballot) ret i64 %bcnt } + +define amdgpu_cs i32 @branch_divergent_ballot_ne_zero_non_compare(i32 %v) { +; CHECK-LABEL: branch_divergent_ballot_ne_zero_non_compare: +; CHECK: ; %bb.0: +; CHECK-NEXT: v_and_b32_e32 v0, 1, v0 +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; CHECK-NEXT: s_cbranch_vccz .LBB7_2 +; CHECK-NEXT: ; %bb.1: ; %true +; CHECK-NEXT: s_mov_b32 s0, 42 +; CHECK-NEXT: s_branch .LBB7_3 +; CHECK-NEXT: .LBB7_2: ; %false +; CHECK-NEXT: s_mov_b32 s0, 33 +; CHECK-NEXT: s_branch .LBB7_3 +; CHECK-NEXT: .LBB7_3: + %c = trunc i32 %v to i1 + %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %c) + %ballot_ne_zero = icmp ne i64 %ballot, 0 + br i1 %ballot_ne_zero, label %true, label %false +true: + ret i32 42 +false: + ret i32 33 +} + +define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_non_compare(i32 inreg %v) { +; CHECK-LABEL: branch_uniform_ballot_ne_zero_non_compare: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_and_b32 s0, s0, 1 +; CHECK-NEXT: v_cmp_ne_u32_e64 vcc, s0, 0 +; CHECK-NEXT: s_cbranch_vccz .LBB8_2 +; CHECK-NEXT: ; %bb.1: ; %true +; CHECK-NEXT: s_mov_b32 s0, 42 +; CHECK-NEXT: s_branch .LBB8_3 +; CHECK-NEXT: .LBB8_2: ; %false +; CHECK-NEXT: s_mov_b32 s0, 33 +; CHECK-NEXT: s_branch .LBB8_3 +; CHECK-NEXT: .LBB8_3: + %c = trunc i32 %v to i1 + %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %c) + %ballot_ne_zero = icmp ne i64 %ballot, 0 + br i1 %ballot_ne_zero, label %true, label %false +true: + ret i32 42 +false: + ret i32 33 +} + +define amdgpu_cs i32 @branch_divergent_ballot_eq_zero_non_compare(i32 %v) { +; CHECK-LABEL: branch_divergent_ballot_eq_zero_non_compare: +; CHECK: ; %bb.0: +; CHECK-NEXT: v_and_b32_e32 v0, 1, v0 +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; CHECK-NEXT: s_cbranch_vccz .LBB9_2 +; CHECK-NEXT: ; %bb.1: ; %false +; CHECK-NEXT: s_mov_b32 s0, 33 +; CHECK-NEXT: s_branch .LBB9_3 +; CHECK-NEXT: .LBB9_2: ; %true +; CHECK-NEXT: s_mov_b32 s0, 42 +; CHECK-NEXT: s_branch .LBB9_3 +; CHECK-NEXT: .LBB9_3: + %c = trunc i32 %v to i1 + %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %c) + %ballot_eq_zero = icmp eq i64 %ballot, 0 + br i1 %ballot_eq_zero, label %true, label %false +true: + ret i32 42 +false: + ret i32 33 +} + +define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_non_compare(i32 inreg %v) { +; CHECK-LABEL: branch_uniform_ballot_eq_zero_non_compare: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_and_b32 s0, s0, 1 +; CHECK-NEXT: v_cmp_ne_u32_e64 vcc, s0, 0 +; CHECK-NEXT: s_cbranch_vccz .LBB10_2 +; CHECK-NEXT: ; %bb.1: ; %false +; CHECK-NEXT: s_mov_b32 s0, 33 +; CHECK-NEXT: s_branch .LBB10_3 +; CHECK-NEXT: .LBB10_2: ; %true +; CHECK-NEXT: s_mov_b32 s0, 42 +; CHECK-NEXT: s_branch .LBB10_3 +; CHECK-NEXT: .LBB10_3: + %c = trunc i32 %v to i1 + %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %c) + %ballot_eq_zero = icmp eq i64 %ballot, 0 + br i1 %ballot_eq_zero, label %true, label %false +true: + ret i32 42 +false: + ret i32 33 +} + +define amdgpu_cs i32 @branch_divergent_ballot_ne_zero_compare(i32 %v) { +; CHECK-LABEL: branch_divergent_ballot_ne_zero_compare: +; CHECK: ; %bb.0: +; CHECK-NEXT: v_cmp_gt_u32_e32 vcc, 12, v0 +; CHECK-NEXT: s_cbranch_vccz .LBB11_2 +; CHECK-NEXT: ; %bb.1: ; %true +; CHECK-NEXT: s_mov_b32 s0, 42 +; CHECK-NEXT: s_branch .LBB11_3 +; CHECK-NEXT: .LBB11_2: ; %false +; CHECK-NEXT: s_mov_b32 s0, 33 +; CHECK-NEXT: s_branch .LBB11_3 +; CHECK-NEXT: .LBB11_3: + %c = icmp ult i32 %v, 12 + %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %c) + %ballot_ne_zero = icmp ne i64 %ballot, 0 + br i1 %ballot_ne_zero, label %true, label %false +true: + ret i32 42 +false: + ret i32 33 +} + +define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_compare(i32 inreg %v) { +; CHECK-LABEL: branch_uniform_ballot_ne_zero_compare: +; CHECK: ; %bb.0: +; CHECK-NEXT: v_cmp_lt_u32_e64 vcc, s0, 12 +; CHECK-NEXT: s_cbranch_vccz .LBB12_2 +; CHECK-NEXT: ; %bb.1: ; %true +; CHECK-NEXT: s_mov_b32 s0, 42 +; CHECK-NEXT: s_branch .LBB12_3 +; CHECK-NEXT: .LBB12_2: ; %false +; CHECK-NEXT: s_mov_b32 s0, 33 +; CHECK-NEXT: s_branch .LBB12_3 +; CHECK-NEXT: .LBB12_3: + %c = icmp ult i32 %v, 12 + %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %c) + %ballot_ne_zero = icmp ne i64 %ballot, 0 + br i1 %ballot_ne_zero, label %true, label %false +true: + ret i32 42 +false: + ret i32 33 +} + +define amdgpu_cs i32 @branch_divergent_ballot_eq_zero_compare(i32 %v) { +; CHECK-LABEL: branch_divergent_ballot_eq_zero_compare: +; CHECK: ; %bb.0: +; CHECK-NEXT: v_cmp_gt_u32_e32 vcc, 12, v0 +; CHECK-NEXT: s_cbranch_vccz .LBB13_2 +; CHECK-NEXT: ; %bb.1: ; %false +; CHECK-NEXT: s_mov_b32 s0, 33 +; CHECK-NEXT: s_branch .LBB13_3 +; CHECK-NEXT: .LBB13_2: ; %true +; CHECK-NEXT: s_mov_b32 s0, 42 +; CHECK-NEXT: s_branch .LBB13_3 +; CHECK-NEXT: .LBB13_3: + %c = icmp ult i32 %v, 12 + %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %c) + %ballot_eq_zero = icmp eq i64 %ballot, 0 + br i1 %ballot_eq_zero, label %true, label %false +true: + ret i32 42 +false: + ret i32 33 +} + +define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_compare(i32 inreg %v) { +; CHECK-LABEL: branch_uniform_ballot_eq_zero_compare: +; CHECK: ; %bb.0: +; CHECK-NEXT: v_cmp_lt_u32_e64 vcc, s0, 12 +; CHECK-NEXT: s_cbranch_vccz .LBB14_2 +; CHECK-NEXT: ; %bb.1: ; %false +; CHECK-NEXT: s_mov_b32 s0, 33 +; CHECK-NEXT: s_branch .LBB14_3 +; CHECK-NEXT: .LBB14_2: ; %true +; CHECK-NEXT: s_mov_b32 s0, 42 +; CHECK-NEXT: s_branch .LBB14_3 +; CHECK-NEXT: .LBB14_3: + %c = icmp ult i32 %v, 12 + %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %c) + %ballot_eq_zero = icmp eq i64 %ballot, 0 + br i1 %ballot_eq_zero, label %true, label %false +true: + ret i32 42 +false: + ret i32 33 +} + +define amdgpu_cs i32 @branch_divergent_ballot_ne_zero_and(i32 %v1, i32 %v2) { +; CHECK-LABEL: branch_divergent_ballot_ne_zero_and: +; CHECK: ; %bb.0: +; CHECK-NEXT: v_cmp_gt_u32_e32 vcc, 12, v0 +; CHECK-NEXT: v_cmp_lt_u32_e64 s[0:1], 34, v1 +; CHECK-NEXT: s_and_b64 vcc, vcc, s[0:1] +; CHECK-NEXT: s_cbranch_vccz .LBB15_2 +; CHECK-NEXT: ; %bb.1: ; %true +; CHECK-NEXT: s_mov_b32 s0, 42 +; CHECK-NEXT: s_branch .LBB15_3 +; CHECK-NEXT: .LBB15_2: ; %false +; CHECK-NEXT: s_mov_b32 s0, 33 +; CHECK-NEXT: s_branch .LBB15_3 +; CHECK-NEXT: .LBB15_3: + %v1c = icmp ult i32 %v1, 12 + %v2c = icmp ugt i32 %v2, 34 + %c = and i1 %v1c, %v2c + %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %c) + %ballot_ne_zero = icmp ne i64 %ballot, 0 + br i1 %ballot_ne_zero, label %true, label %false +true: + ret i32 42 +false: + ret i32 33 +} + +define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_and(i32 inreg %v1, i32 inreg %v2) { +; CHECK-LABEL: branch_uniform_ballot_ne_zero_and: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_cmp_lt_u32 s0, 12 +; CHECK-NEXT: s_cselect_b64 s[2:3], -1, 0 +; CHECK-NEXT: s_cmp_gt_u32 s1, 34 +; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0 +; CHECK-NEXT: s_and_b64 s[0:1], s[2:3], s[0:1] +; CHECK-NEXT: s_and_b64 s[0:1], s[0:1], exec +; CHECK-NEXT: s_cbranch_scc0 .LBB16_2 +; CHECK-NEXT: ; %bb.1: ; %true +; CHECK-NEXT: s_mov_b32 s0, 42 +; CHECK-NEXT: s_branch .LBB16_3 +; CHECK-NEXT: .LBB16_2: ; %false +; CHECK-NEXT: s_mov_b32 s0, 33 +; CHECK-NEXT: s_branch .LBB16_3 +; CHECK-NEXT: .LBB16_3: + %v1c = icmp ult i32 %v1, 12 + %v2c = icmp ugt i32 %v2, 34 + %c = and i1 %v1c, %v2c + %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %c) + %ballot_ne_zero = icmp ne i64 %ballot, 0 + br i1 %ballot_ne_zero, label %true, label %false +true: + ret i32 42 +false: + ret i32 33 +} + +define amdgpu_cs i32 @branch_divergent_ballot_eq_zero_and(i32 %v1, i32 %v2) { +; CHECK-LABEL: branch_divergent_ballot_eq_zero_and: +; CHECK: ; %bb.0: +; CHECK-NEXT: v_cmp_gt_u32_e32 vcc, 12, v0 +; CHECK-NEXT: v_cmp_lt_u32_e64 s[0:1], 34, v1 +; CHECK-NEXT: s_and_b64 vcc, vcc, s[0:1] +; CHECK-NEXT: s_cbranch_vccz .LBB17_2 +; CHECK-NEXT: ; %bb.1: ; %false +; CHECK-NEXT: s_mov_b32 s0, 33 +; CHECK-NEXT: s_branch .LBB17_3 +; CHECK-NEXT: .LBB17_2: ; %true +; CHECK-NEXT: s_mov_b32 s0, 42 +; CHECK-NEXT: s_branch .LBB17_3 +; CHECK-NEXT: .LBB17_3: + %v1c = icmp ult i32 %v1, 12 + %v2c = icmp ugt i32 %v2, 34 + %c = and i1 %v1c, %v2c + %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %c) + %ballot_eq_zero = icmp eq i64 %ballot, 0 + br i1 %ballot_eq_zero, label %true, label %false +true: + ret i32 42 +false: + ret i32 33 +} + +define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_and(i32 inreg %v1, i32 inreg %v2) { +; CHECK-LABEL: branch_uniform_ballot_eq_zero_and: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_cmp_lt_u32 s0, 12 +; CHECK-NEXT: s_cselect_b64 s[2:3], -1, 0 +; CHECK-NEXT: s_cmp_gt_u32 s1, 34 +; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0 +; CHECK-NEXT: s_and_b64 s[0:1], s[2:3], s[0:1] +; CHECK-NEXT: s_and_b64 s[0:1], s[0:1], exec +; CHECK-NEXT: s_cbranch_scc0 .LBB18_2 +; CHECK-NEXT: ; %bb.1: ; %false +; CHECK-NEXT: s_mov_b32 s0, 33 +; CHECK-NEXT: s_branch .LBB18_3 +; CHECK-NEXT: .LBB18_2: ; %true +; CHECK-NEXT: s_mov_b32 s0, 42 +; CHECK-NEXT: s_branch .LBB18_3 +; CHECK-NEXT: .LBB18_3: + %v1c = icmp ult i32 %v1, 12 + %v2c = icmp ugt i32 %v2, 34 + %c = and i1 %v1c, %v2c + %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %c) + %ballot_eq_zero = icmp eq i64 %ballot, 0 + br i1 %ballot_eq_zero, label %true, label %false +true: + ret i32 42 +false: + ret i32 33 +} + +define amdgpu_cs i32 @branch_uniform_ballot_sgt_N_compare(i32 inreg %v) { +; CHECK-LABEL: branch_uniform_ballot_sgt_N_compare: +; CHECK: ; %bb.0: +; CHECK-NEXT: v_cmp_lt_u32_e64 s[0:1], s0, 12 +; CHECK-NEXT: v_cmp_lt_i64_e64 vcc, s[0:1], 23 +; CHECK-NEXT: s_cbranch_vccnz .LBB19_2 +; CHECK-NEXT: ; %bb.1: ; %true +; CHECK-NEXT: s_mov_b32 s0, 42 +; CHECK-NEXT: s_branch .LBB19_3 +; CHECK-NEXT: .LBB19_2: ; %false +; CHECK-NEXT: s_mov_b32 s0, 33 +; CHECK-NEXT: s_branch .LBB19_3 +; CHECK-NEXT: .LBB19_3: + %c = icmp ult i32 %v, 12 + %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %c) + %bc = icmp sgt i64 %ballot, 22 + br i1 %bc, label %true, label %false +true: + ret i32 42 +false: + ret i32 33 +} + +declare i64 @llvm.amdgcn.icmp.i64(i1, i1, i32) + +define amdgpu_cs i32 @branch_divergent_simulated_negated_ballot_ne_zero_and(i32 %v1, i32 %v2) { +; CHECK-LABEL: branch_divergent_simulated_negated_ballot_ne_zero_and: +; CHECK: ; %bb.0: +; CHECK-NEXT: v_cmp_gt_u32_e32 vcc, 12, v0 +; CHECK-NEXT: v_cmp_lt_u32_e64 s[0:1], 34, v1 +; CHECK-NEXT: s_and_b64 vcc, vcc, s[0:1] +; CHECK-NEXT: s_cbranch_vccnz .LBB20_2 +; CHECK-NEXT: ; %bb.1: ; %true +; CHECK-NEXT: s_mov_b32 s0, 42 +; CHECK-NEXT: s_branch .LBB20_3 +; CHECK-NEXT: .LBB20_2: ; %false +; CHECK-NEXT: s_mov_b32 s0, 33 +; CHECK-NEXT: s_branch .LBB20_3 +; CHECK-NEXT: .LBB20_3: + %v1c = icmp ult i32 %v1, 12 + %v2c = icmp ugt i32 %v2, 34 + %c = and i1 %v1c, %v2c + %ballot = call i64 @llvm.amdgcn.icmp.i64(i1 %c, i1 0, i32 32) ; ICMP_EQ == 32 + %ballot_ne_zero = icmp ne i64 %ballot, 0 + br i1 %ballot_ne_zero, label %true, label %false +true: + ret i32 42 +false: + ret i32 33 +} + +define amdgpu_cs i32 @branch_uniform_simulated_negated_ballot_ne_zero_and(i32 inreg %v1, i32 inreg %v2) { +; CHECK-LABEL: branch_uniform_simulated_negated_ballot_ne_zero_and: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_cmp_lt_u32 s0, 12 +; CHECK-NEXT: s_cselect_b64 s[2:3], -1, 0 +; CHECK-NEXT: s_cmp_gt_u32 s1, 34 +; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0 +; CHECK-NEXT: s_and_b64 s[0:1], s[2:3], s[0:1] +; CHECK-NEXT: s_and_b64 s[0:1], s[0:1], exec +; CHECK-NEXT: s_cbranch_scc1 .LBB21_2 +; CHECK-NEXT: ; %bb.1: ; %true +; CHECK-NEXT: s_mov_b32 s0, 42 +; CHECK-NEXT: s_branch .LBB21_3 +; CHECK-NEXT: .LBB21_2: ; %false +; CHECK-NEXT: s_mov_b32 s0, 33 +; CHECK-NEXT: s_branch .LBB21_3 +; CHECK-NEXT: .LBB21_3: + %v1c = icmp ult i32 %v1, 12 + %v2c = icmp ugt i32 %v2, 34 + %c = and i1 %v1c, %v2c + %ballot = call i64 @llvm.amdgcn.icmp.i64(i1 %c, i1 0, i32 32) ; ICMP_EQ == 32 + %ballot_ne_zero = icmp ne i64 %ballot, 0 + br i1 %ballot_ne_zero, label %true, label %false +true: + ret i32 42 +false: + ret i32 33 +} + +define amdgpu_cs i32 @branch_divergent_simulated_negated_ballot_eq_zero_and(i32 %v1, i32 %v2) { +; CHECK-LABEL: branch_divergent_simulated_negated_ballot_eq_zero_and: +; CHECK: ; %bb.0: +; CHECK-NEXT: v_cmp_gt_u32_e32 vcc, 12, v0 +; CHECK-NEXT: v_cmp_lt_u32_e64 s[0:1], 34, v1 +; CHECK-NEXT: s_and_b64 vcc, vcc, s[0:1] +; CHECK-NEXT: s_cbranch_vccnz .LBB22_2 +; CHECK-NEXT: ; %bb.1: ; %false +; CHECK-NEXT: s_mov_b32 s0, 33 +; CHECK-NEXT: s_branch .LBB22_3 +; CHECK-NEXT: .LBB22_2: ; %true +; CHECK-NEXT: s_mov_b32 s0, 42 +; CHECK-NEXT: s_branch .LBB22_3 +; CHECK-NEXT: .LBB22_3: + %v1c = icmp ult i32 %v1, 12 + %v2c = icmp ugt i32 %v2, 34 + %c = and i1 %v1c, %v2c + %ballot = call i64 @llvm.amdgcn.icmp.i64(i1 %c, i1 0, i32 32) ; ICMP_EQ == 32 + %ballot_eq_zero = icmp eq i64 %ballot, 0 + br i1 %ballot_eq_zero, label %true, label %false +true: + ret i32 42 +false: + ret i32 33 +} + +define amdgpu_cs i32 @branch_uniform_simulated_negated_ballot_eq_zero_and(i32 inreg %v1, i32 inreg %v2) { +; CHECK-LABEL: branch_uniform_simulated_negated_ballot_eq_zero_and: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_cmp_lt_u32 s0, 12 +; CHECK-NEXT: s_cselect_b64 s[2:3], -1, 0 +; CHECK-NEXT: s_cmp_gt_u32 s1, 34 +; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0 +; CHECK-NEXT: s_and_b64 s[0:1], s[2:3], s[0:1] +; CHECK-NEXT: s_and_b64 s[0:1], s[0:1], exec +; CHECK-NEXT: s_cbranch_scc1 .LBB23_2 +; CHECK-NEXT: ; %bb.1: ; %false +; CHECK-NEXT: s_mov_b32 s0, 33 +; CHECK-NEXT: s_branch .LBB23_3 +; CHECK-NEXT: .LBB23_2: ; %true +; CHECK-NEXT: s_mov_b32 s0, 42 +; CHECK-NEXT: s_branch .LBB23_3 +; CHECK-NEXT: .LBB23_3: + %v1c = icmp ult i32 %v1, 12 + %v2c = icmp ugt i32 %v2, 34 + %c = and i1 %v1c, %v2c + %ballot = call i64 @llvm.amdgcn.icmp.i64(i1 %c, i1 0, i32 32) ; ICMP_EQ == 32 + %ballot_eq_zero = icmp eq i64 %ballot, 0 + br i1 %ballot_eq_zero, label %true, label %false +true: + ret i32 42 +false: + ret i32 33 +} \ No newline at end of file diff --git a/llvm/test/CodeGen/AMDGPU/long-branch-reserve-register.ll b/llvm/test/CodeGen/AMDGPU/long-branch-reserve-register.ll index dc7d2eed53696e..fd543b96b0cf36 100644 --- a/llvm/test/CodeGen/AMDGPU/long-branch-reserve-register.ll +++ b/llvm/test/CodeGen/AMDGPU/long-branch-reserve-register.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 ; RUN: llc -march=amdgcn -verify-machineinstrs -amdgpu-s-branch-bits=4 -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -enable-var-scope -check-prefix=GCN %s ; OBJ: Relocations [ @@ -54,32 +55,32 @@ bb3: define amdgpu_kernel void @uniform_conditional_min_long_forward_branch(ptr addrspace(1) %arg, i32 %cnd) #0 { ; GCN-LABEL: uniform_conditional_min_long_forward_branch: ; GCN: ; %bb.0: ; %bb0 -; GCN-NEXT: s_load_dword s2, s[0:1], 0xb -; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: s_cmp_eq_u32 s2, 0 -; GCN-NEXT: s_cbranch_scc0 .LBB1_1 +; GCN-NEXT: s_load_dword s2, s[0:1], 0xb +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_cmp_eq_u32 s2, 0 +; GCN-NEXT: s_cbranch_scc0 .LBB1_1 ; GCN-NEXT: .LBB1_3: ; %bb0 -; GCN-NEXT: s_getpc_b64 s[8:9] +; GCN-NEXT: s_getpc_b64 s[8:9] ; GCN-NEXT: .Lpost_getpc0: -; GCN-NEXT: s_add_u32 s8, s8, (.LBB1_2-.Lpost_getpc0)&4294967295 -; GCN-NEXT: s_addc_u32 s9, s9, (.LBB1_2-.Lpost_getpc0)>>32 -; GCN-NEXT: s_setpc_b64 s[8:9] +; GCN-NEXT: s_add_u32 s8, s8, (.LBB1_2-.Lpost_getpc0)&4294967295 +; GCN-NEXT: s_addc_u32 s9, s9, (.LBB1_2-.Lpost_getpc0)>>32 +; GCN-NEXT: s_setpc_b64 s[8:9] ; GCN-NEXT: .LBB1_1: ; %bb2 -; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: v_nop_e64 -; GCN-NEXT: v_nop_e64 -; GCN-NEXT: v_nop_e64 -; GCN-NEXT: v_nop_e64 -; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: v_nop_e64 +; GCN-NEXT: v_nop_e64 +; GCN-NEXT: v_nop_e64 +; GCN-NEXT: v_nop_e64 +; GCN-NEXT: ;;#ASMEND ; GCN-NEXT: .LBB1_2: ; %bb3 -; GCN-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GCN-NEXT: s_mov_b32 s7, 0xf000 -; GCN-NEXT: s_mov_b32 s6, -1 -; GCN-NEXT: v_mov_b32_e32 v0, s2 -; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: buffer_store_dword v0, off, s[4:7], 0 -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_endpgm +; GCN-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 +; GCN-NEXT: s_mov_b32 s7, 0xf000 +; GCN-NEXT: s_mov_b32 s6, -1 +; GCN-NEXT: v_mov_b32_e32 v0, s2 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: s_endpgm bb0: %cmp = icmp eq i32 %cnd, 0 br i1 %cmp, label %bb3, label %bb2 ; +9 dword branch @@ -290,7 +291,7 @@ define amdgpu_kernel void @uniform_unconditional_min_long_forward_branch(ptr add ; GCN-NEXT: v_nop_e64 ; GCN-NEXT: v_nop_e64 ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: s_mov_b64 vcc, exec +; GCN-NEXT: s_mov_b64 vcc, exec ; GCN-NEXT: s_cbranch_execnz .LBB5_5 ; GCN-NEXT: .LBB5_9: ; %bb3 ; GCN-NEXT: s_getpc_b64 s[4:5] diff --git a/llvm/test/CodeGen/AMDGPU/mul.ll b/llvm/test/CodeGen/AMDGPU/mul.ll index da7645d5011fc9..179b0a855182ce 100644 --- a/llvm/test/CodeGen/AMDGPU/mul.ll +++ b/llvm/test/CodeGen/AMDGPU/mul.ll @@ -2644,6 +2644,45 @@ entry: ret void } +define i32 @mul_pow2_plus_1(i32 %val) { +; SI-LABEL: mul_pow2_plus_1: +; SI: ; %bb.0: +; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-NEXT: v_mul_lo_u32 v0, v0, 9 +; SI-NEXT: s_setpc_b64 s[30:31] +; +; VI-LABEL: mul_pow2_plus_1: +; VI: ; %bb.0: +; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-NEXT: v_mul_lo_u32 v0, v0, 9 +; VI-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: mul_pow2_plus_1: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_lshl_add_u32 v0, v0, 3, v0 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: mul_pow2_plus_1: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_lshl_add_u32 v0, v0, 3, v0 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: mul_pow2_plus_1: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_lshl_add_u32 v0, v0, 3, v0 +; GFX11-NEXT: s_setpc_b64 s[30:31] +; +; EG-LABEL: mul_pow2_plus_1: +; EG: ; %bb.0: +; EG-NEXT: CF_END +; EG-NEXT: PAD + %mul = mul i32 %val, 9 + ret i32 %mul +} + declare i32 @llvm.amdgcn.workitem.id.x() #1 attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll b/llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll index 60f309c0c1882f..963575d2acc51d 100644 --- a/llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll +++ b/llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll @@ -141,9 +141,9 @@ define amdgpu_ps float @else3(i32 %z, float %v, i32 inreg %bound, i32 %x0) #0 { ; SI-NEXT: bb.2.Flow: ; SI-NEXT: successors: %bb.3(0x40000000), %bb.5(0x40000000) ; SI-NEXT: {{ $}} - ; SI-NEXT: [[PHI2:%[0-9]+]]:vgpr_32 = PHI undef %32:vgpr_32, %bb.1, %10, %bb.4 - ; SI-NEXT: [[PHI3:%[0-9]+]]:vgpr_32 = PHI undef %33:vgpr_32, %bb.1, %9, %bb.4 - ; SI-NEXT: [[PHI4:%[0-9]+]]:vgpr_32 = PHI [[PHI1]], %bb.1, undef %35:vgpr_32, %bb.4 + ; SI-NEXT: [[PHI2:%[0-9]+]]:vgpr_32 = PHI undef %31:vgpr_32, %bb.1, %10, %bb.4 + ; SI-NEXT: [[PHI3:%[0-9]+]]:vgpr_32 = PHI undef %32:vgpr_32, %bb.1, %9, %bb.4 + ; SI-NEXT: [[PHI4:%[0-9]+]]:vgpr_32 = PHI [[PHI1]], %bb.1, undef %34:vgpr_32, %bb.4 ; SI-NEXT: [[SI_ELSE:%[0-9]+]]:sreg_32 = SI_ELSE killed [[SI_IF]], %bb.5, implicit-def dead $exec, implicit-def dead $scc, implicit $exec ; SI-NEXT: S_BRANCH %bb.3 ; SI-NEXT: {{ $}} @@ -158,7 +158,7 @@ define amdgpu_ps float @else3(i32 %z, float %v, i32 inreg %bound, i32 %x0) #0 { ; SI-NEXT: successors: %bb.2(0x80000000) ; SI-NEXT: {{ $}} ; SI-NEXT: [[V_MUL_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e64 0, [[COPY2]], 0, [[PHI1]], 0, 0, implicit $mode, implicit $exec - ; SI-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 killed [[PHI1]], 3, implicit $exec + ; SI-NEXT: [[V_LSHL_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_LSHL_ADD_U32_e64 killed [[PHI1]], 1, [[PHI1]], implicit $exec ; SI-NEXT: S_BRANCH %bb.2 ; SI-NEXT: {{ $}} ; SI-NEXT: bb.5.if.end: diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-liverange.ll b/llvm/test/CodeGen/AMDGPU/vgpr-liverange.ll index c71dc06c68d8d6..9183f043f052cb 100644 --- a/llvm/test/CodeGen/AMDGPU/vgpr-liverange.ll +++ b/llvm/test/CodeGen/AMDGPU/vgpr-liverange.ll @@ -92,20 +92,20 @@ define amdgpu_ps float @else3(i32 %z, float %v, i32 inreg %bound, i32 %x0) #0 { ; SI-NEXT: .LBB2_1: ; %if.end ; SI-NEXT: ; in Loop: Header=BB2_2 Depth=1 ; SI-NEXT: s_or_b32 exec_lo, exec_lo, s2 -; SI-NEXT: v_add_nc_u32_e32 v2, 1, v0 +; SI-NEXT: v_add_nc_u32_e32 v2, 1, v3 ; SI-NEXT: s_add_i32 s1, s1, 1 ; SI-NEXT: s_cmp_lt_i32 s1, s0 ; SI-NEXT: s_cbranch_scc0 .LBB2_6 ; SI-NEXT: .LBB2_2: ; %for.body ; SI-NEXT: ; =>This Inner Loop Header: Depth=1 -; SI-NEXT: ; implicit-def: $vgpr0 ; SI-NEXT: ; implicit-def: $vgpr3 +; SI-NEXT: ; implicit-def: $vgpr0 ; SI-NEXT: s_and_saveexec_b32 s2, vcc_lo ; SI-NEXT: s_xor_b32 s2, exec_lo, s2 ; SI-NEXT: ; %bb.3: ; %else ; SI-NEXT: ; in Loop: Header=BB2_2 Depth=1 -; SI-NEXT: v_mul_lo_u32 v0, v2, 3 -; SI-NEXT: v_mul_f32_e32 v3, v1, v2 +; SI-NEXT: v_mul_f32_e32 v0, v1, v2 +; SI-NEXT: v_lshl_add_u32 v3, v2, 1, v2 ; SI-NEXT: ; implicit-def: $vgpr2 ; SI-NEXT: ; %bb.4: ; %Flow ; SI-NEXT: ; in Loop: Header=BB2_2 Depth=1 @@ -113,11 +113,11 @@ define amdgpu_ps float @else3(i32 %z, float %v, i32 inreg %bound, i32 %x0) #0 { ; SI-NEXT: s_cbranch_execz .LBB2_1 ; SI-NEXT: ; %bb.5: ; %if ; SI-NEXT: ; in Loop: Header=BB2_2 Depth=1 -; SI-NEXT: v_mul_f32_e32 v3, s1, v1 -; SI-NEXT: v_add_nc_u32_e32 v0, 1, v2 +; SI-NEXT: v_mul_f32_e32 v0, s1, v1 +; SI-NEXT: v_add_nc_u32_e32 v3, 1, v2 ; SI-NEXT: s_branch .LBB2_1 ; SI-NEXT: .LBB2_6: ; %for.end -; SI-NEXT: v_add_f32_e32 v0, v0, v3 +; SI-NEXT: v_add_f32_e32 v0, v3, v0 ; SI-NEXT: ; return to shader part epilog entry: ; %break = icmp sgt i32 %bound, 0 diff --git a/llvm/test/CodeGen/AMDGPU/wqm.ll b/llvm/test/CodeGen/AMDGPU/wqm.ll index 95f947cbca14f0..6bb066f06dd9a2 100644 --- a/llvm/test/CodeGen/AMDGPU/wqm.ll +++ b/llvm/test/CodeGen/AMDGPU/wqm.ll @@ -1536,7 +1536,7 @@ define amdgpu_ps <4 x float> @test_control_flow_2(<8 x i32> inreg %rsrc, <4 x i3 ; GFX9-W64-NEXT: ; %bb.2: ; %Flow ; GFX9-W64-NEXT: s_andn2_saveexec_b64 s[14:15], s[14:15] ; GFX9-W64-NEXT: ; %bb.3: ; %IF -; GFX9-W64-NEXT: v_mul_lo_u32 v0, v5, 3 +; GFX9-W64-NEXT: v_lshl_add_u32 v0, v5, 1, v5 ; GFX9-W64-NEXT: ; %bb.4: ; %END ; GFX9-W64-NEXT: s_or_b64 exec, exec, s[14:15] ; GFX9-W64-NEXT: s_and_b64 exec, exec, s[12:13] @@ -1566,7 +1566,7 @@ define amdgpu_ps <4 x float> @test_control_flow_2(<8 x i32> inreg %rsrc, <4 x i3 ; GFX10-W32-NEXT: ; %bb.2: ; %Flow ; GFX10-W32-NEXT: s_andn2_saveexec_b32 s13, s13 ; GFX10-W32-NEXT: ; %bb.3: ; %IF -; GFX10-W32-NEXT: v_mul_lo_u32 v0, v5, 3 +; GFX10-W32-NEXT: v_lshl_add_u32 v0, v5, 1, v5 ; GFX10-W32-NEXT: ; %bb.4: ; %END ; GFX10-W32-NEXT: s_or_b32 exec_lo, exec_lo, s13 ; GFX10-W32-NEXT: s_and_b32 exec_lo, exec_lo, s12 diff --git a/llvm/test/DebugInfo/Generic/arrays-need-types.ll b/llvm/test/DebugInfo/Generic/arrays-need-types.ll new file mode 100644 index 00000000000000..a1b7c963d38443 --- /dev/null +++ b/llvm/test/DebugInfo/Generic/arrays-need-types.ll @@ -0,0 +1,27 @@ +; RUN: opt %s -o - -S --passes=verify 2>&1 | FileCheck %s + +; CHECK: array types must have a base type +; CHECK-NEXT: !DICompositeType(tag: DW_TAG_array_type, +; CHECK-NEXT: warning: ignoring invalid debug info + +declare void @llvm.dbg.value(metadata, metadata, metadata) + +define i32 @func(ptr %0) !dbg !3 { + call void @llvm.dbg.value(metadata ptr %0, metadata !6, metadata !DIExpression()), !dbg !10 + ret i32 0 +} + +!llvm.module.flags = !{!0} +!llvm.dbg.cu = !{!1} + +!0 = !{i32 2, !"Debug Info Version", i32 3} +!1 = distinct !DICompileUnit(language: DW_LANG_C11, file: !2, producer: "clang", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug) +!2 = !DIFile(filename: "file.c", directory: "/") +!3 = distinct !DISubprogram(name: "func", scope: !2, file: !2, line: 46, type: !4, scopeLine: 48, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !1) +!4 = distinct !DISubroutineType(types: !5) +!5 = !{} +!6 = !DILocalVariable(name: "op", arg: 5, scope: !3, file: !2, line: 47, type: !7) +!7 = !DICompositeType(tag: DW_TAG_array_type, size: 2624, elements: !8) +!8 = !{!9} +!9 = !DISubrange(count: 41) +!10 = !DILocation(line: 0, scope: !3) diff --git a/llvm/test/Verifier/alias.ll b/llvm/test/Verifier/alias.ll index 8d755ec6a91c6f..42758cf2c7f37b 100644 --- a/llvm/test/Verifier/alias.ll +++ b/llvm/test/Verifier/alias.ll @@ -1,5 +1,7 @@ -; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s --implicit-check-not=alias --implicit-check-not=Alias +; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s +; CHECK: : assembly parsed, but does not verify as correct! +; CHECK-NOT: {{(^A| a)lias(es)? }} declare void @f() @fa = alias void (), ptr @f diff --git a/mlir/include/mlir-c/BuiltinTypes.h b/mlir/include/mlir-c/BuiltinTypes.h index a6d8e10efbde92..1fd5691f41eec3 100644 --- a/mlir/include/mlir-c/BuiltinTypes.h +++ b/mlir/include/mlir-c/BuiltinTypes.h @@ -271,6 +271,32 @@ MLIR_CAPI_EXPORTED MlirType mlirVectorTypeGetChecked(MlirLocation loc, const int64_t *shape, MlirType elementType); +/// Creates a scalable vector type with the shape identified by its rank and +/// dimensions. A subset of dimensions may be marked as scalable via the +/// corresponding flag list, which is expected to have as many entries as the +/// rank of the vector. The vector is created in the same context as the element +/// type. +MLIR_CAPI_EXPORTED MlirType mlirVectorTypeGetScalable(intptr_t rank, + const int64_t *shape, + const bool *scalable, + MlirType elementType); + +/// Same as "mlirVectorTypeGetScalable" but returns a nullptr wrapping MlirType +/// on illegal arguments, emitting appropriate diagnostics. +MLIR_CAPI_EXPORTED +MlirType mlirVectorTypeGetScalableChecked(MlirLocation loc, intptr_t rank, + const int64_t *shape, + const bool *scalable, + MlirType elementType); + +/// Checks whether the given vector type is scalable, i.e., has at least one +/// scalable dimension. +MLIR_CAPI_EXPORTED bool mlirVectorTypeIsScalable(MlirType type); + +/// Checks whether the "dim"-th dimension of the given vector is scalable. +MLIR_CAPI_EXPORTED bool mlirVectorTypeIsDimScalable(MlirType type, + intptr_t dim); + //===----------------------------------------------------------------------===// // Ranked / Unranked Tensor type. //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMDialect.h b/mlir/include/mlir/Dialect/LLVMIR/LLVMDialect.h index bbed1ea5cf6220..06df4a601b7a3f 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMDialect.h +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMDialect.h @@ -209,8 +209,7 @@ class GEPIndicesAdaptor { /// global and use it to compute the address of the first character in the /// string (operations inserted at the builder insertion point). Value createGlobalString(Location loc, OpBuilder &builder, StringRef name, - StringRef value, Linkage linkage, - bool useOpaquePointers = true); + StringRef value, Linkage linkage); /// LLVM requires some operations to be inside of a Module operation. This /// function confirms that the Operation has the desired properties. diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td index 72c932ac07a2e1..1123466b7a75e3 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td @@ -469,16 +469,16 @@ def LLVM_ThreadlocalAddressOp : LLVM_OneResultIntrOp<"threadlocal.address", [], def LLVM_CoroIdOp : LLVM_IntrOp<"coro.id", [], [], [], 1> { let arguments = (ins I32:$align, - LLVM_i8Ptr:$promise, - LLVM_i8Ptr:$coroaddr, - LLVM_i8Ptr:$fnaddrs); + LLVM_AnyPointer:$promise, + LLVM_AnyPointer:$coroaddr, + LLVM_AnyPointer:$fnaddrs); let assemblyFormat = "$align `,` $promise `,` $coroaddr `,` $fnaddrs" " attr-dict `:` functional-type(operands, results)"; } def LLVM_CoroBeginOp : LLVM_IntrOp<"coro.begin", [], [], [], 1> { let arguments = (ins LLVM_TokenType:$token, - LLVM_i8Ptr:$mem); + LLVM_AnyPointer:$mem); let assemblyFormat = "$token `,` $mem attr-dict `:` functional-type(operands, results)"; } @@ -491,7 +491,7 @@ def LLVM_CoroAlignOp : LLVM_IntrOp<"coro.align", [0], [], [], 1> { } def LLVM_CoroSaveOp : LLVM_IntrOp<"coro.save", [], [], [], 1> { - let arguments = (ins LLVM_i8Ptr:$handle); + let arguments = (ins LLVM_AnyPointer:$handle); let assemblyFormat = "$handle attr-dict `:` functional-type(operands, results)"; } @@ -502,7 +502,7 @@ def LLVM_CoroSuspendOp : LLVM_IntrOp<"coro.suspend", [], [], [], 1> { } def LLVM_CoroEndOp : LLVM_IntrOp<"coro.end", [], [], [], 1> { - let arguments = (ins LLVM_i8Ptr:$handle, + let arguments = (ins LLVM_AnyPointer:$handle, I1:$unwind, LLVM_TokenType:$retvals); let assemblyFormat = "$handle `,` $unwind `,` $retvals attr-dict `:` functional-type(operands, results)"; @@ -510,12 +510,12 @@ def LLVM_CoroEndOp : LLVM_IntrOp<"coro.end", [], [], [], 1> { def LLVM_CoroFreeOp : LLVM_IntrOp<"coro.free", [], [], [], 1> { let arguments = (ins LLVM_TokenType:$id, - LLVM_i8Ptr:$handle); + LLVM_AnyPointer:$handle); let assemblyFormat = "$id `,` $handle attr-dict `:` functional-type(operands, results)"; } def LLVM_CoroResumeOp : LLVM_IntrOp<"coro.resume", [], [], [], 0> { - let arguments = (ins LLVM_i8Ptr:$handle); + let arguments = (ins LLVM_AnyPointer:$handle); let assemblyFormat = "$handle attr-dict `:` qualified(type($handle))"; } @@ -591,19 +591,19 @@ def LLVM_DbgLabelOp : LLVM_IntrOp<"dbg.label", [], [], [], 0> { // def LLVM_VaStartOp : LLVM_ZeroResultIntrOp<"vastart">, - Arguments<(ins LLVM_i8Ptr:$arg_list)> { + Arguments<(ins LLVM_AnyPointer:$arg_list)> { let assemblyFormat = "$arg_list attr-dict `:` qualified(type($arg_list))"; let summary = "Initializes `arg_list` for subsequent variadic argument extractions."; } def LLVM_VaCopyOp : LLVM_ZeroResultIntrOp<"vacopy">, - Arguments<(ins LLVM_i8Ptr:$dest_list, LLVM_i8Ptr:$src_list)> { + Arguments<(ins LLVM_AnyPointer:$dest_list, LLVM_AnyPointer:$src_list)> { let assemblyFormat = "$src_list `to` $dest_list attr-dict `:` type(operands)"; let summary = "Copies the current argument position from `src_list` to `dest_list`."; } def LLVM_VaEndOp : LLVM_ZeroResultIntrOp<"vaend">, - Arguments<(ins LLVM_i8Ptr:$arg_list)> { + Arguments<(ins LLVM_AnyPointer:$arg_list)> { let assemblyFormat = "$arg_list attr-dict `:` qualified(type($arg_list))"; let summary = "Destroys `arg_list`, which has been initialized by `intr.vastart` or `intr.vacopy`."; } @@ -613,7 +613,7 @@ def LLVM_VaEndOp : LLVM_ZeroResultIntrOp<"vaend">, // def LLVM_EhTypeidForOp : LLVM_OneResultIntrOp<"eh.typeid.for"> { - let arguments = (ins LLVM_i8Ptr:$type_info); + let arguments = (ins LLVM_AnyPointer:$type_info); let assemblyFormat = "$type_info attr-dict `:` functional-type(operands, results)"; } @@ -927,12 +927,12 @@ def LLVM_PtrAnnotation : LLVM_OneResultIntrOp<"ptr.annotation", [0], [2], [AllTypesMatch<["res", "ptr"]>, AllTypesMatch<["annotation", "fileName", "attr"]>]> { - let arguments = (ins LLVM_PointerTo:$ptr, + let arguments = (ins LLVM_AnyPointer:$ptr, LLVM_AnyPointer:$annotation, LLVM_AnyPointer:$fileName, I32:$line, LLVM_AnyPointer:$attr); - let results = (outs LLVM_PointerTo:$res); + let results = (outs LLVM_AnyPointer:$res); } def LLVM_Annotation diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMOpBase.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMOpBase.td index 503988abfc090a..4e42a0e46d9bf9 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMOpBase.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMOpBase.td @@ -55,43 +55,18 @@ def LLVM_AnyFloat : Type< def LLVM_AnyPointer : Type($_self)">, "LLVM pointer type", "::mlir::LLVM::LLVMPointerType">; -def LLVM_OpaquePointer : Type< +// Pointer in a given address space. +class LLVM_PointerInAddressSpace : Type< And<[LLVM_AnyPointer.predicate, - CPred<"::llvm::cast<::mlir::LLVM::LLVMPointerType>($_self).isOpaque()">]>, - "LLVM opaque pointer", "::mlir::LLVM::LLVMPointerType">; - -// Type constraint accepting LLVM pointer type with an additional constraint -// on the element type. -class LLVM_PointerTo : Type< - And<[LLVM_AnyPointer.predicate, - Or<[LLVM_OpaquePointer.predicate, - SubstLeaves< - "$_self", - "::llvm::cast<::mlir::LLVM::LLVMPointerType>($_self).getElementType()", - pointee.predicate>]>]>, - "LLVM pointer to " # pointee.summary, "::mlir::LLVM::LLVMPointerType">; - -// Opaque pointer in a given address space. -class LLVM_OpaquePointerInAddressSpace : Type< - And<[LLVM_OpaquePointer.predicate, - CPred< - "::llvm::cast<::mlir::LLVM::LLVMPointerType>($_self).getAddressSpace() == " - # addressSpace>]>, - "Opaque LLVM pointer in address space " # addressSpace, + CPred< + "::llvm::cast<::mlir::LLVM::LLVMPointerType>($_self).getAddressSpace() == " + # addressSpace>]>, + "LLVM pointer in address space " # addressSpace, "::mlir::LLVM::LLVMPointerType"> { let builderCall = "$_builder.getType<::mlir::LLVM::LLVMPointerType>(" # addressSpace # ")"; } -// Type constraints accepting LLVM pointer type to integer of a specific width. -class LLVM_IntPtrBase : Type< - And<[LLVM_PointerTo>.predicate, - CPred<"::llvm::cast<::mlir::LLVM::LLVMPointerType>($_self).getAddressSpace()" - " == " # addressSpace>]>, - "LLVM pointer to " # I.summary>; - -def LLVM_i8Ptr : LLVM_IntPtrBase<8>; - // Type constraint accepting any LLVM structure type. def LLVM_AnyStruct : Type($_self)">, "LLVM structure type">; diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td index c8549f146d0297..cb8a6e08886df9 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td @@ -184,16 +184,14 @@ def LLVM_AllocaOp : LLVM_Op<"alloca", LLVM_MemOpPatterns { let arguments = (ins AnyInteger:$arraySize, OptionalAttr:$alignment, - OptionalAttr:$elem_type, + TypeAttr:$elem_type, UnitAttr:$inalloca); let results = (outs Res]>:$res); string llvmInstName = "Alloca"; string llvmBuilder = [{ auto addrSpace = $_resultType->getPointerAddressSpace(); - llvm::Type *elementType = moduleTranslation.convertType( - $elem_type ? *$elem_type - : ::llvm::cast(op.getType()).getElementType()); + llvm::Type *elementType = moduleTranslation.convertType($elem_type); auto *inst = builder.CreateAlloca(elementType, addrSpace, $arraySize); }] # setAlignmentCode # [{ inst->setUsedWithInAlloca($inalloca); @@ -207,31 +205,16 @@ def LLVM_AllocaOp : LLVM_Op<"alloca", $res = $_builder.create( $_location, $_resultType, $arraySize, alignment == 0 ? IntegerAttr() : $_builder.getI64IntegerAttr(alignment), - TypeAttr::get(allocatedType), allocaInst->isUsedWithInAlloca()); + allocatedType, allocaInst->isUsedWithInAlloca()); }]; let builders = [ - DeprecatedOpBuilder<"the usage of typed pointers is deprecated", - (ins "Type":$resultType, "Value":$arraySize, - "unsigned":$alignment), - [{ - assert(!::llvm::cast(resultType).isOpaque() && - "pass the allocated type explicitly if opaque pointers are used"); - if (alignment == 0) - return build($_builder, $_state, resultType, arraySize, IntegerAttr(), - TypeAttr(), false); - build($_builder, $_state, resultType, arraySize, - $_builder.getI64IntegerAttr(alignment), TypeAttr(), false); - }]>, OpBuilder<(ins "Type":$resultType, "Type":$elementType, "Value":$arraySize, CArg<"unsigned", "0">:$alignment), [{ - TypeAttr elemTypeAttr = - ::llvm::cast(resultType).isOpaque() ? - TypeAttr::get(elementType) : TypeAttr(); build($_builder, $_state, resultType, arraySize, alignment == 0 ? IntegerAttr() : $_builder.getI64IntegerAttr(alignment), - elemTypeAttr, false); + elementType, false); }]> ]; @@ -247,7 +230,7 @@ def LLVM_GEPOp : LLVM_Op<"getelementptr", [Pure, let arguments = (ins LLVM_ScalarOrVectorOf:$base, Variadic>:$dynamicIndices, DenseI32ArrayAttr:$rawConstantIndices, - OptionalAttr:$elem_type, + TypeAttr:$elem_type, UnitAttr:$inbounds); let results = (outs LLVM_ScalarOrVectorOf:$res); let skipDefaultBuilders = 1; @@ -282,14 +265,6 @@ def LLVM_GEPOp : LLVM_Op<"getelementptr", [Pure, OpBuilder<(ins "Type":$resultType, "Type":$basePtrType, "Value":$basePtr, "ValueRange":$indices, CArg<"bool", "false">:$inbounds, CArg<"ArrayRef", "{}">:$attributes)>, - DeprecatedOpBuilder<"the usage of typed pointers is deprecated", - (ins "Type":$resultType, "Value":$basePtr, - "ValueRange":$indices, CArg<"bool", "false">:$inbounds, - CArg<"ArrayRef", "{}">:$attributes)>, - DeprecatedOpBuilder<"the usage of typed pointers is deprecated", - (ins "Type":$resultType, "Value":$basePtr, - "ArrayRef":$indices, CArg<"bool", "false">:$inbounds, - CArg<"ArrayRef", "{}">:$attributes)>, OpBuilder<(ins "Type":$resultType, "Type":$basePtrType, "Value":$basePtr, "ArrayRef":$indices, CArg<"bool", "false">:$inbounds, CArg<"ArrayRef", "{}">:$attributes)>, @@ -306,22 +281,19 @@ def LLVM_GEPOp : LLVM_Op<"getelementptr", [Pure, indices.push_back( builder.getInt32(valueOrAttr.get().getInt())); } - Type baseElementType = op.getSourceElementType(); + Type baseElementType = op.getElemType(); llvm::Type *elementType = moduleTranslation.convertType(baseElementType); $res = builder.CreateGEP(elementType, $base, indices, "", $inbounds); }]; let assemblyFormat = [{ (`inbounds` $inbounds^)? $base `[` custom($dynamicIndices, $rawConstantIndices) `]` attr-dict - `:` functional-type(operands, results) (`,` $elem_type^)? + `:` functional-type(operands, results) `,` $elem_type }]; let extraClassDeclaration = [{ constexpr static int32_t kDynamicIndex = std::numeric_limits::min(); - /// Returns the type pointed to by the pointer argument of this GEP. - Type getSourceElementType(); - GEPIndicesAdaptor getIndices(); }]; let hasFolder = 1; @@ -332,7 +304,7 @@ def LLVM_LoadOp : LLVM_MemAccessOpBase<"load", [DeclareOpInterfaceMethods, DeclareOpInterfaceMethods, DeclareOpInterfaceMethods]> { - dag args = (ins LLVM_PointerTo:$addr, + dag args = (ins LLVM_AnyPointer:$addr, OptionalAttr:$alignment, UnitAttr:$volatile_, UnitAttr:$nontemporal, @@ -370,7 +342,7 @@ def LLVM_LoadOp : LLVM_MemAccessOpBase<"load", let assemblyFormat = [{ (`volatile` $volatile_^)? $addr (`atomic` (`syncscope` `(` $syncscope^ `)`)? $ordering^)? - attr-dict `:` custom(type($addr), type($res)) + attr-dict `:` qualified(type($addr)) `->` type($res) }]; string llvmBuilder = [{ auto *inst = builder.CreateLoad($_resultType, $addr, $volatile_); @@ -391,9 +363,6 @@ def LLVM_LoadOp : LLVM_MemAccessOpBase<"load", getLLVMSyncScope(loadInst)); }]; let builders = [ - DeprecatedOpBuilder<"the usage of typed pointers is deprecated", - (ins "Value":$addr, CArg<"unsigned", "0">:$alignment, - CArg<"bool", "false">:$isVolatile, CArg<"bool", "false">:$isNonTemporal)>, OpBuilder<(ins "Type":$type, "Value":$addr, CArg<"unsigned", "0">:$alignment, CArg<"bool", "false">:$isVolatile, CArg<"bool", "false">:$isNonTemporal, @@ -408,7 +377,7 @@ def LLVM_StoreOp : LLVM_MemAccessOpBase<"store", DeclareOpInterfaceMethods, DeclareOpInterfaceMethods]> { dag args = (ins LLVM_LoadableType:$value, - LLVM_PointerTo:$addr, + LLVM_AnyPointer:$addr, OptionalAttr:$alignment, UnitAttr:$volatile_, UnitAttr:$nontemporal, @@ -445,7 +414,7 @@ def LLVM_StoreOp : LLVM_MemAccessOpBase<"store", let assemblyFormat = [{ (`volatile` $volatile_^)? $value `,` $addr (`atomic` (`syncscope` `(` $syncscope^ `)`)? $ordering^)? - attr-dict `:` custom(type($value), type($addr)) + attr-dict `:` type($value) `,` qualified(type($addr)) }]; string llvmBuilder = [{ auto *inst = builder.CreateStore($value, $addr, $volatile_); @@ -651,8 +620,7 @@ def LLVM_CallOp : LLVM_MemAccessOpBase<"call", OpBuilder<(ins "LLVMFunctionType":$calleeType, "FlatSymbolRefAttr":$callee, CArg<"ValueRange", "{}">:$args)>, OpBuilder<(ins "LLVMFunctionType":$calleeType, "StringRef":$callee, - CArg<"ValueRange", "{}">:$args)>, - OpBuilder<(ins "Value":$callee, "ValueRange":$args)> + CArg<"ValueRange", "{}">:$args)> ]; let hasCustomAssemblyFormat = 1; let extraClassDeclaration = [{ @@ -1636,7 +1604,7 @@ def LLVM_AtomicRMWOp : LLVM_MemAccessOpBase<"atomicrmw", [ TypesMatchWith<"result #0 and operand #1 have the same type", "val", "res", "$_self">]> { dag args = (ins AtomicBinOp:$bin_op, - LLVM_PointerTo:$ptr, + LLVM_AnyPointer:$ptr, LLVM_AtomicRMWType:$val, AtomicOrdering:$ordering, OptionalAttr:$syncscope, OptionalAttr:$alignment, @@ -1687,7 +1655,7 @@ def LLVM_AtomicCmpXchgOp : LLVM_MemAccessOpBase<"cmpxchg", [ TypesMatchWith<"result #0 has an LLVM struct type consisting of " "the type of operand #2 and a bool", "val", "res", "getValAndBoolStructType($_self)">]> { - dag args = (ins LLVM_PointerTo:$ptr, + dag args = (ins LLVM_AnyPointer:$ptr, LLVM_AtomicCmpXchgType:$cmp, LLVM_AtomicCmpXchgType:$val, AtomicOrdering:$success_ordering, AtomicOrdering:$failure_ordering, diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMTypes.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMTypes.td index e31029bfed5a54..0bd068c1be7c90 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMTypes.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMTypes.td @@ -137,30 +137,17 @@ def LLVMPointerType : LLVMType<"LLVMPointer", "ptr", [ ``` }]; - let parameters = (ins DefaultValuedParameter<"Type", "Type()">:$elementType, - DefaultValuedParameter<"unsigned", "0">:$addressSpace); + let parameters = (ins DefaultValuedParameter<"unsigned", "0">:$addressSpace); let assemblyFormat = [{ - (`<` custom($elementType, $addressSpace)^ `>`)? + (`<` $addressSpace^ `>`)? }]; - let genVerifyDecl = 1; - + let skipDefaultBuilders = 1; let builders = [ - TypeBuilderWithInferredContext<(ins "Type":$elementType, - CArg<"unsigned", "0">:$addressSpace)>, TypeBuilder<(ins CArg<"unsigned", "0">:$addressSpace), [{ - return $_get($_ctxt, Type(), addressSpace); + return $_get($_ctxt, addressSpace); }]> ]; - - let extraClassDeclaration = [{ - /// Returns `true` if this type is the opaque pointer type, i.e., it has no - /// pointed-to type. - bool isOpaque() const { return !getElementType(); } - - /// Checks if the given type can have a pointer type pointing to it. - static bool isValidElementType(Type type); - }]; } //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td index c49decde1638b1..ffe6f25fcd944b 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td @@ -19,10 +19,8 @@ include "mlir/Dialect/LLVMIR/LLVMOpBase.td" include "mlir/Interfaces/SideEffectInterfaces.td" include "mlir/Dialect/LLVMIR/BasicPtxBuilderInterface.td" -def LLVM_i8Ptr_global : LLVM_IntPtrBase<8, 1>; -def LLVM_i8Ptr_shared : LLVM_IntPtrBase<8, 3>; -def LLVM_i64ptr_any : LLVM_IntPtrBase<64>; -def LLVM_i64ptr_shared : LLVM_IntPtrBase<64, 3>; +def LLVM_PointerGlobal : LLVM_PointerInAddressSpace<1>; +def LLVM_PointerShared : LLVM_PointerInAddressSpace<3>; //===----------------------------------------------------------------------===// // NVVM dialect definitions @@ -213,7 +211,7 @@ def NVVM_ReduxOp : /// mbarrier.init instruction with generic pointer type def NVVM_MBarrierInitOp : NVVM_PTXBuilder_Op<"mbarrier.init">, - Arguments<(ins LLVM_i64ptr_any:$addr, I32:$count, PtxPredicate:$predicate)> { + Arguments<(ins LLVM_AnyPointer:$addr, I32:$count, PtxPredicate:$predicate)> { string llvmBuilder = [{ createIntrinsicCall(builder, llvm::Intrinsic::nvvm_mbarrier_init, {$addr, $count}); }]; @@ -228,7 +226,7 @@ def NVVM_MBarrierInitOp : NVVM_PTXBuilder_Op<"mbarrier.init">, /// mbarrier.init instruction with shared pointer type def NVVM_MBarrierInitSharedOp : NVVM_PTXBuilder_Op<"mbarrier.init.shared">, - Arguments<(ins LLVM_i64ptr_shared:$addr, I32:$count, PtxPredicate:$predicate)> { + Arguments<(ins LLVM_PointerShared:$addr, I32:$count, PtxPredicate:$predicate)> { string llvmBuilder = [{ createIntrinsicCall(builder, llvm::Intrinsic::nvvm_mbarrier_init_shared, {$addr, $count}); }]; @@ -240,7 +238,7 @@ def NVVM_MBarrierInitSharedOp : NVVM_PTXBuilder_Op<"mbarrier.init.shared">, } def NVVM_MBarrierInvalOp : NVVM_Op<"mbarrier.inval">, - Arguments<(ins LLVM_i64ptr_any:$addr)> { + Arguments<(ins LLVM_AnyPointer:$addr)> { string llvmBuilder = [{ createIntrinsicCall(builder, llvm::Intrinsic::nvvm_mbarrier_inval, {$addr}); }]; @@ -248,7 +246,7 @@ def NVVM_MBarrierInvalOp : NVVM_Op<"mbarrier.inval">, } def NVVM_MBarrierInvalSharedOp : NVVM_Op<"mbarrier.inval.shared">, - Arguments<(ins LLVM_i64ptr_shared:$addr)> { + Arguments<(ins LLVM_PointerShared:$addr)> { string llvmBuilder = [{ createIntrinsicCall(builder, llvm::Intrinsic::nvvm_mbarrier_inval_shared, {$addr}); }]; @@ -257,7 +255,7 @@ def NVVM_MBarrierInvalSharedOp : NVVM_Op<"mbarrier.inval.shared">, def NVVM_MBarrierArriveOp : NVVM_Op<"mbarrier.arrive">, Results<(outs LLVM_Type:$res)>, - Arguments<(ins LLVM_i64ptr_any:$addr)> { + Arguments<(ins LLVM_AnyPointer:$addr)> { string llvmBuilder = [{ $res = createIntrinsicCall(builder, llvm::Intrinsic::nvvm_mbarrier_arrive, {$addr}); }]; @@ -266,16 +264,16 @@ def NVVM_MBarrierArriveOp : NVVM_Op<"mbarrier.arrive">, def NVVM_MBarrierArriveSharedOp : NVVM_Op<"mbarrier.arrive.shared">, Results<(outs LLVM_Type:$res)>, - Arguments<(ins LLVM_i64ptr_shared:$addr)> { + Arguments<(ins LLVM_PointerShared:$addr)> { string llvmBuilder = [{ $res = createIntrinsicCall(builder, llvm::Intrinsic::nvvm_mbarrier_arrive_shared, {$addr}); }]; - let assemblyFormat = "$addr attr-dict `:` type($addr) `->` type($res)"; + let assemblyFormat = "$addr attr-dict `:` qualified(type($addr)) `->` type($res)"; } def NVVM_MBarrierArriveNocompleteOp : NVVM_Op<"mbarrier.arrive.nocomplete">, Results<(outs LLVM_Type:$res)>, - Arguments<(ins LLVM_i64ptr_any:$addr, I32:$count)> { + Arguments<(ins LLVM_AnyPointer:$addr, I32:$count)> { string llvmBuilder = [{ $res = createIntrinsicCall(builder, llvm::Intrinsic::nvvm_mbarrier_arrive_noComplete, {$addr, $count}); }]; @@ -284,7 +282,7 @@ def NVVM_MBarrierArriveNocompleteOp : NVVM_Op<"mbarrier.arrive.nocomplete">, def NVVM_MBarrierArriveNocompleteSharedOp : NVVM_Op<"mbarrier.arrive.nocomplete.shared">, Results<(outs LLVM_Type:$res)>, - Arguments<(ins LLVM_i64ptr_shared:$addr, I32:$count)> { + Arguments<(ins LLVM_PointerShared:$addr, I32:$count)> { string llvmBuilder = [{ $res = createIntrinsicCall(builder, llvm::Intrinsic::nvvm_mbarrier_arrive_noComplete_shared, {$addr, $count}); }]; @@ -292,7 +290,7 @@ def NVVM_MBarrierArriveNocompleteSharedOp : NVVM_Op<"mbarrier.arrive.nocomplete. } def NVVM_MBarrierArriveExpectTxOp : NVVM_PTXBuilder_Op<"mbarrier.arrive.expect_tx">, - Arguments<(ins LLVM_i64ptr_any:$addr, I32:$txcount, PtxPredicate:$predicate)> { + Arguments<(ins LLVM_AnyPointer:$addr, I32:$txcount, PtxPredicate:$predicate)> { let assemblyFormat = "$addr `,` $txcount (`,` `predicate` `=` $predicate^)? attr-dict `:` type(operands)"; let extraClassDefinition = [{ std::string $cppClass::getPtx() { return std::string("mbarrier.arrive.expect_tx.b64 _, [%0], %1;"); } @@ -300,7 +298,7 @@ def NVVM_MBarrierArriveExpectTxOp : NVVM_PTXBuilder_Op<"mbarrier.arrive.expect_t } def NVVM_MBarrierArriveExpectTxSharedOp : NVVM_PTXBuilder_Op<"mbarrier.arrive.expect_tx.shared">, - Arguments<(ins LLVM_i64ptr_shared:$addr, I32:$txcount, PtxPredicate:$predicate)> { + Arguments<(ins LLVM_PointerShared:$addr, I32:$txcount, PtxPredicate:$predicate)> { let assemblyFormat = "$addr `,` $txcount (`,` `predicate` `=` $predicate^)? attr-dict `:` type(operands)"; let extraClassDefinition = [{ std::string $cppClass::getPtx() { return std::string("mbarrier.arrive.expect_tx.shared.b64 _, [%0], %1;"); } @@ -308,7 +306,7 @@ def NVVM_MBarrierArriveExpectTxSharedOp : NVVM_PTXBuilder_Op<"mbarrier.arrive.ex } def NVVM_MBarrierTryWaitParityOp : NVVM_PTXBuilder_Op<"mbarrier.try_wait.parity">, - Arguments<(ins LLVM_i64ptr_any:$addr, I32:$phase, I32:$ticks)> { + Arguments<(ins LLVM_AnyPointer:$addr, I32:$phase, I32:$ticks)> { let assemblyFormat = "$addr `,` $phase `,` $ticks attr-dict `:` type(operands)"; let extraClassDefinition = [{ std::string $cppClass::getPtx() { @@ -327,7 +325,7 @@ def NVVM_MBarrierTryWaitParityOp : NVVM_PTXBuilder_Op<"mbarrier.try_wait.parity" } def NVVM_MBarrierTryWaitParitySharedOp : NVVM_PTXBuilder_Op<"mbarrier.try_wait.parity.shared">, - Arguments<(ins LLVM_i64ptr_shared:$addr, I32:$phase, I32:$ticks)> { + Arguments<(ins LLVM_PointerShared:$addr, I32:$phase, I32:$ticks)> { let assemblyFormat = "$addr `,` $phase `,` $ticks attr-dict `:` type(operands)"; let extraClassDefinition = [{ std::string $cppClass::getPtx() { @@ -347,7 +345,7 @@ def NVVM_MBarrierTryWaitParitySharedOp : NVVM_PTXBuilder_Op<"mbarrier.try_wait.p def NVVM_MBarrierTestWaitOp : NVVM_Op<"mbarrier.test.wait">, Results<(outs LLVM_Type:$res)>, - Arguments<(ins LLVM_i64ptr_any:$addr, LLVM_Type:$state)> { + Arguments<(ins LLVM_AnyPointer:$addr, LLVM_Type:$state)> { string llvmBuilder = [{ $res = createIntrinsicCall(builder, llvm::Intrinsic::nvvm_mbarrier_test_wait, {$addr, $state}); }]; @@ -356,7 +354,7 @@ def NVVM_MBarrierTestWaitOp : NVVM_Op<"mbarrier.test.wait">, def NVVM_MBarrierTestWaitSharedOp : NVVM_Op<"mbarrier.test.wait.shared">, Results<(outs LLVM_Type:$res)>, - Arguments<(ins LLVM_i64ptr_shared:$addr, LLVM_Type:$state)> { + Arguments<(ins LLVM_PointerShared:$addr, LLVM_Type:$state)> { string llvmBuilder = [{ $res = createIntrinsicCall(builder, llvm::Intrinsic::nvvm_mbarrier_test_wait_shared, {$addr, $state}); }]; @@ -501,8 +499,8 @@ def LoadCacheModifierKind : I32EnumAttr<"LoadCacheModifierKind", def LoadCacheModifierAttr : EnumAttr; def NVVM_CpAsyncOp : NVVM_PTXBuilder_Op<"cp.async.shared.global">, - Arguments<(ins LLVM_i8Ptr_shared:$dst, - LLVM_i8Ptr_global:$src, + Arguments<(ins LLVM_PointerShared:$dst, + LLVM_PointerGlobal:$src, I32Attr:$size, LoadCacheModifierAttr:$modifier, Optional:$cpSize)> { @@ -1187,7 +1185,7 @@ def NVVM_WMMAMmaOp : NVVM_Op<"wmma.mma">, } def NVVM_StMatrixOp: NVVM_PTXBuilder_Op<"stmatrix">, - Arguments<(ins LLVM_i8Ptr_shared:$ptr, + Arguments<(ins LLVM_PointerShared:$ptr, Variadic:$sources, MMALayoutAttr:$layout)> { let summary = "cooperative matrix store"; @@ -1404,9 +1402,9 @@ def NVVM_CpAsyncBulkTensorGlobalToSharedClusterOp : NVVM_Op<"cp.async.bulk.tensor.shared.cluster.global", [DeclareOpInterfaceMethods, AttrSizedOperandSegments]>, - Arguments<(ins LLVM_i64ptr_shared:$dstMem, - LLVM_i64ptr_any:$tmaDescriptor, - LLVM_i64ptr_shared:$mbar, + Arguments<(ins LLVM_PointerShared:$dstMem, + LLVM_AnyPointer:$tmaDescriptor, + LLVM_PointerShared:$mbar, Variadic:$coordinates, PtxPredicate:$predicate)> { let assemblyFormat = [{ @@ -1439,8 +1437,8 @@ def NVVM_CpAsyncBulkTensorSharedCTAToGlobalOp : NVVM_Op<"cp.async.bulk.tensor.global.shared.cta", [DeclareOpInterfaceMethods, AttrSizedOperandSegments]>, - Arguments<(ins LLVM_i64ptr_any:$tmaDescriptor, - LLVM_i64ptr_shared:$srcMem, + Arguments<(ins LLVM_AnyPointer:$tmaDescriptor, + LLVM_PointerShared:$srcMem, Variadic:$coordinates, PtxPredicate:$predicate)> { let assemblyFormat = [{ @@ -1469,7 +1467,7 @@ def NVVM_CpAsyncBulkTensorSharedCTAToGlobalOp : def NVVM_PrefetchTensorMapOp : NVVM_Op<"prefetch.tensormap", [DeclareOpInterfaceMethods]>, - Arguments<(ins LLVM_i64ptr_any:$tmaDescriptor, PtxPredicate:$predicate)> { + Arguments<(ins LLVM_AnyPointer:$tmaDescriptor, PtxPredicate:$predicate)> { let assemblyFormat = "$tmaDescriptor (`,` `predicate` `=` $predicate^)? attr-dict `:` type(operands)"; let extraClassDefinition = [{ std::string $cppClass::getPtx() { diff --git a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td index 6c6419bf238b45..48b830ae34f292 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td @@ -275,7 +275,7 @@ def ROCDL_wmma_i32_16x16x16_iu4 : ROCDL_Wmma_IntrOp<"wmma.i32.16x16x16.iu4">; // raw buffer mode). //===---------------------------------------------------------------------===// -def ROCDLBufferRsrc : LLVM_OpaquePointerInAddressSpace<8>; +def ROCDLBufferRsrc : LLVM_PointerInAddressSpace<8>; def ROCDL_MakeBufferRsrcOp : ROCDL_IntrOp<"make.buffer.rsrc", [], [0], [Pure], 1>, diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td index 72121ad4f9e47a..99ac5cfb7b9e92 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td @@ -1144,15 +1144,15 @@ def DataBoundsOp : OpenMP_Op<"bounds", } def MapInfoOp : OpenMP_Op<"map_info", [AttrSizedOperandSegments]> { - let arguments = (ins OpenMP_PointerLikeType:$var_ptr, - TypeAttr:$var_type, + let arguments = (ins Optional:$val, + Optional:$var_ptr, + OptionalAttr:$var_type, Optional:$var_ptr_ptr, Variadic:$bounds, /* rank-0 to rank-{n-1} */ OptionalAttr:$map_type, OptionalAttr:$map_capture_type, - DefaultValuedAttr:$implicit, OptionalAttr:$name); - let results = (outs OpenMP_PointerLikeType:$omp_ptr); + let results = (outs AnyType:$omp_ptr); let description = [{ The MapInfoOp captures information relating to individual OpenMP map clauses @@ -1178,22 +1178,20 @@ def MapInfoOp : OpenMP_Op<"map_info", [AttrSizedOperandSegments]> { ``` => ```mlir - omp.map_info var_ptr(%index_ssa) map_type(to) map_capture_type(ByRef) implicit(false) + omp.map_info var_ptr(%index_ssa) map_type(to) map_capture_type(ByRef) name(index) ``` Description of arguments: + - `val`: The value to copy. - `var_ptr`: The address of variable to copy. - - `var_type`: The type of the variable to copy. + - `var_type`: The type of the variable/value to copy. - `var_ptr_ptr`: Used when the variable copied is a member of a class, structure or derived type and refers to the originating struct. - `bounds`: Used when copying slices of array's, pointers or pointer members of objects (e.g. derived types or classes), indicates the bounds to be copied of the variable. When it's an array slice it is in rank order where rank 0 is the inner-most dimension. - - `implicit`: indicates where the map item has been specified explicitly in a - map clause or captured implicitly by being used in a target region with no - map or other data mapping construct. - 'map_clauses': OpenMP map type for this map capture, for example: from, to and always. It's a bitfield composed of the OpenMP runtime flags stored in OpenMPOffloadMappingFlags. @@ -1203,9 +1201,10 @@ def MapInfoOp : OpenMP_Op<"map_info", [AttrSizedOperandSegments]> { }]; let assemblyFormat = [{ - `var_ptr` `(` $var_ptr `:` type($var_ptr) `,` $var_type`)` oilist( - `var_ptr_ptr` `(` $var_ptr_ptr `:` type($var_ptr_ptr) `)` + `val` `(` $val `:` type($val) `)` + | `var_ptr` `(` $var_ptr `:` type($var_ptr) `,` $var_type `)` + | `var_ptr_ptr` `(` $var_ptr_ptr `:` type($var_ptr_ptr) `)` | `map_clauses` `(` custom($map_type) `)` | `capture` `(` custom($map_capture_type) `)` | `bounds` `(` $bounds `)` @@ -1265,7 +1264,7 @@ def Target_DataOp: OpenMP_Op<"target_data", [AttrSizedOperandSegments]>{ Optional:$device, Variadic:$use_device_ptr, Variadic:$use_device_addr, - Variadic:$map_operands); + Variadic:$map_operands); let regions = (region AnyRegion:$region); @@ -1314,7 +1313,7 @@ def Target_EnterDataOp: OpenMP_Op<"target_enter_data", let arguments = (ins Optional:$if_expr, Optional:$device, UnitAttr:$nowait, - Variadic:$map_operands); + Variadic:$map_operands); let assemblyFormat = [{ oilist(`if` `(` $if_expr `:` type($if_expr) `)` @@ -1360,7 +1359,7 @@ def Target_ExitDataOp: OpenMP_Op<"target_exit_data", let arguments = (ins Optional:$if_expr, Optional:$device, UnitAttr:$nowait, - Variadic:$map_operands); + Variadic:$map_operands); let assemblyFormat = [{ oilist(`if` `(` $if_expr `:` type($if_expr) `)` @@ -1377,7 +1376,7 @@ def Target_ExitDataOp: OpenMP_Op<"target_exit_data", // 2.14.5 target construct //===----------------------------------------------------------------------===// -def TargetOp : OpenMP_Op<"target",[OutlineableOpenMPOpInterface, AttrSizedOperandSegments]> { +def TargetOp : OpenMP_Op<"target",[IsolatedFromAbove, OutlineableOpenMPOpInterface, AttrSizedOperandSegments]> { let summary = "target construct"; let description = [{ The target construct includes a region of code which is to be executed @@ -1403,7 +1402,7 @@ def TargetOp : OpenMP_Op<"target",[OutlineableOpenMPOpInterface, AttrSizedOperan Optional:$device, Optional:$thread_limit, UnitAttr:$nowait, - Variadic:$map_operands); + Variadic:$map_operands); let regions = (region AnyRegion:$region); @@ -1412,7 +1411,7 @@ def TargetOp : OpenMP_Op<"target",[OutlineableOpenMPOpInterface, AttrSizedOperan | `device` `(` $device `:` type($device) `)` | `thread_limit` `(` $thread_limit `:` type($thread_limit) `)` | `nowait` $nowait - | `map_entries` `(` $map_operands `:` type($map_operands) `)` + | `map_entries` `(` custom($map_operands, type($map_operands)) `)` ) $region attr-dict }]; diff --git a/mlir/lib/Bindings/Python/IRTypes.cpp b/mlir/lib/Bindings/Python/IRTypes.cpp index a7ccfbea542f5c..483db673f989e6 100644 --- a/mlir/lib/Bindings/Python/IRTypes.cpp +++ b/mlir/lib/Bindings/Python/IRTypes.cpp @@ -462,19 +462,62 @@ class PyVectorType : public PyConcreteType { using PyConcreteType::PyConcreteType; static void bindDerived(ClassTy &c) { - c.def_static( - "get", - [](std::vector shape, PyType &elementType, - DefaultingPyLocation loc) { - PyMlirContext::ErrorCapture errors(loc->getContext()); - MlirType t = mlirVectorTypeGetChecked(loc, shape.size(), shape.data(), - elementType); - if (mlirTypeIsNull(t)) - throw MLIRError("Invalid type", errors.take()); - return PyVectorType(elementType.getContext(), t); - }, - py::arg("shape"), py::arg("elementType"), py::arg("loc") = py::none(), - "Create a vector type"); + c.def_static("get", &PyVectorType::get, py::arg("shape"), + py::arg("elementType"), py::kw_only(), + py::arg("scalable") = py::none(), + py::arg("scalable_dims") = py::none(), + py::arg("loc") = py::none(), "Create a vector type") + .def_property_readonly( + "scalable", + [](MlirType self) { return mlirVectorTypeIsScalable(self); }) + .def_property_readonly("scalable_dims", [](MlirType self) { + std::vector scalableDims; + size_t rank = static_cast(mlirShapedTypeGetRank(self)); + scalableDims.reserve(rank); + for (size_t i = 0; i < rank; ++i) + scalableDims.push_back(mlirVectorTypeIsDimScalable(self, i)); + return scalableDims; + }); + } + +private: + static PyVectorType get(std::vector shape, PyType &elementType, + std::optional scalable, + std::optional> scalableDims, + DefaultingPyLocation loc) { + if (scalable && scalableDims) { + throw py::value_error("'scalable' and 'scalable_dims' kwargs " + "are mutually exclusive."); + } + + PyMlirContext::ErrorCapture errors(loc->getContext()); + MlirType type; + if (scalable) { + if (scalable->size() != shape.size()) + throw py::value_error("Expected len(scalable) == len(shape)."); + + SmallVector scalableDimFlags = llvm::to_vector(llvm::map_range( + *scalable, [](const py::handle &h) { return h.cast(); })); + type = mlirVectorTypeGetScalableChecked(loc, shape.size(), shape.data(), + scalableDimFlags.data(), + elementType); + } else if (scalableDims) { + SmallVector scalableDimFlags(shape.size(), false); + for (int64_t dim : *scalableDims) { + if (static_cast(dim) >= scalableDimFlags.size() || dim < 0) + throw py::value_error("Scalable dimension index out of bounds."); + scalableDimFlags[dim] = true; + } + type = mlirVectorTypeGetScalableChecked(loc, shape.size(), shape.data(), + scalableDimFlags.data(), + elementType); + } else { + type = mlirVectorTypeGetChecked(loc, shape.size(), shape.data(), + elementType); + } + if (mlirTypeIsNull(type)) + throw MLIRError("Invalid type", errors.take()); + return PyVectorType(elementType.getContext(), type); } }; diff --git a/mlir/lib/CAPI/IR/BuiltinTypes.cpp b/mlir/lib/CAPI/IR/BuiltinTypes.cpp index 50266b4b523323..6e645188dac861 100644 --- a/mlir/lib/CAPI/IR/BuiltinTypes.cpp +++ b/mlir/lib/CAPI/IR/BuiltinTypes.cpp @@ -281,6 +281,31 @@ MlirType mlirVectorTypeGetChecked(MlirLocation loc, intptr_t rank, unwrap(elementType))); } +MlirType mlirVectorTypeGetScalable(intptr_t rank, const int64_t *shape, + const bool *scalable, MlirType elementType) { + return wrap(VectorType::get( + llvm::ArrayRef(shape, static_cast(rank)), unwrap(elementType), + llvm::ArrayRef(scalable, static_cast(rank)))); +} + +MlirType mlirVectorTypeGetScalableChecked(MlirLocation loc, intptr_t rank, + const int64_t *shape, + const bool *scalable, + MlirType elementType) { + return wrap(VectorType::getChecked( + unwrap(loc), llvm::ArrayRef(shape, static_cast(rank)), + unwrap(elementType), + llvm::ArrayRef(scalable, static_cast(rank)))); +} + +bool mlirVectorTypeIsScalable(MlirType type) { + return unwrap(type).cast().isScalable(); +} + +bool mlirVectorTypeIsDimScalable(MlirType type, intptr_t dim) { + return unwrap(type).cast().getScalableDims()[dim]; +} + //===----------------------------------------------------------------------===// // Ranked / Unranked tensor type. //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Conversion/LLVMCommon/TypeConverter.cpp b/mlir/lib/Conversion/LLVMCommon/TypeConverter.cpp index 35b95d7a5ebe92..04496d6b8f6344 100644 --- a/mlir/lib/Conversion/LLVMCommon/TypeConverter.cpp +++ b/mlir/lib/Conversion/LLVMCommon/TypeConverter.cpp @@ -76,16 +76,6 @@ LLVMTypeConverter::LLVMTypeConverter(MLIRContext *ctx, : std::nullopt; }); - // LLVM container types may (recursively) contain other types that must be - // converted even when the outer type is compatible. - addConversion([&](LLVM::LLVMPointerType type) -> std::optional { - if (type.isOpaque()) - return type; - if (auto pointee = convertType(type.getElementType())) - return LLVM::LLVMPointerType::get(pointee, type.getAddressSpace()); - return std::nullopt; - }); - addConversion([&](LLVM::LLVMStructType type, SmallVectorImpl &results) -> std::optional { // Fastpath for types that won't be converted by this callback anyway. diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp index 7f5681e7bdc059..c22cff4c1328a5 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp @@ -216,17 +216,13 @@ OpFoldResult ICmpOp::fold(FoldAdaptor adaptor) { //===----------------------------------------------------------------------===// void AllocaOp::print(OpAsmPrinter &p) { - Type elemTy = llvm::cast(getType()).getElementType(); - if (!elemTy) - elemTy = *getElemType(); - auto funcTy = FunctionType::get(getContext(), {getArraySize().getType()}, {getType()}); if (getInalloca()) p << " inalloca"; - p << ' ' << getArraySize() << " x " << elemTy; + p << ' ' << getArraySize() << " x " << getElemType(); if (getAlignment() && *getAlignment() != 0) p.printOptionalAttrDict((*this)->getAttrs(), {kElemTypeAttrName, getInallocaAttrName()}); @@ -277,40 +273,16 @@ ParseResult AllocaOp::parse(OpAsmParser &parser, OperationState &result) { return failure(); Type resultType = funcType.getResult(0); - if (auto ptrResultType = llvm::dyn_cast(resultType)) { - if (ptrResultType.isOpaque()) - result.addAttribute(kElemTypeAttrName, TypeAttr::get(elemType)); - } + if (auto ptrResultType = llvm::dyn_cast(resultType)) + result.addAttribute(kElemTypeAttrName, TypeAttr::get(elemType)); result.addTypes({funcType.getResult(0)}); return success(); } -/// Checks that the elemental type is present in either the pointer type or -/// the attribute, but not both. -static LogicalResult verifyOpaquePtr(Operation *op, LLVMPointerType ptrType, - std::optional ptrElementType) { - if (ptrType.isOpaque() && !ptrElementType.has_value()) { - return op->emitOpError() << "expected '" << kElemTypeAttrName - << "' attribute if opaque pointer type is used"; - } - if (!ptrType.isOpaque() && ptrElementType.has_value()) { - return op->emitOpError() - << "unexpected '" << kElemTypeAttrName - << "' attribute when non-opaque pointer type is used"; - } - return success(); -} - LogicalResult AllocaOp::verify() { - LLVMPointerType ptrType = llvm::cast(getType()); - if (failed(verifyOpaquePtr(getOperation(), ptrType, getElemType()))) - return failure(); - - Type elemTy = - (ptrType.isOpaque()) ? *getElemType() : ptrType.getElementType(); // Only certain target extension types can be used in 'alloca'. - if (auto targetExtType = dyn_cast(elemTy); + if (auto targetExtType = dyn_cast(getElemType()); targetExtType && !targetExtType.supportsMemOps()) return emitOpError() << "this target extension type cannot be used in alloca"; @@ -318,11 +290,7 @@ LogicalResult AllocaOp::verify() { return success(); } -Type AllocaOp::getResultPtrElementType() { - // This will become trivial once non-opaque pointers are gone. - return getElemType().has_value() ? *getElemType() - : getResult().getType().getElementType(); -} +Type AllocaOp::getResultPtrElementType() { return getElemType(); } //===----------------------------------------------------------------------===// // LLVM::BrOp @@ -525,18 +493,6 @@ static Type extractVectorElementType(Type type) { return type; } -void GEPOp::build(OpBuilder &builder, OperationState &result, Type resultType, - Value basePtr, ArrayRef indices, bool inbounds, - ArrayRef attributes) { - auto ptrType = - llvm::cast(extractVectorElementType(basePtr.getType())); - assert(!ptrType.isOpaque() && - "expected non-opaque pointer, provide elementType explicitly when " - "opaque pointers are used"); - build(builder, result, resultType, ptrType.getElementType(), basePtr, indices, - inbounds, attributes); -} - /// Destructures the 'indices' parameter into 'rawConstantIndices' and /// 'dynamicIndices', encoding the former in the process. In the process, /// dynamic indices which are used to index into a structure type are converted @@ -602,20 +558,11 @@ void GEPOp::build(OpBuilder &builder, OperationState &result, Type resultType, result.addAttribute(getInboundsAttrName(result.name), builder.getUnitAttr()); } - if (llvm::cast(extractVectorElementType(basePtr.getType())) - .isOpaque()) - result.addAttribute(kElemTypeAttrName, TypeAttr::get(elementType)); + result.addAttribute(kElemTypeAttrName, TypeAttr::get(elementType)); result.addOperands(basePtr); result.addOperands(dynamicIndices); } -void GEPOp::build(OpBuilder &builder, OperationState &result, Type resultType, - Value basePtr, ValueRange indices, bool inbounds, - ArrayRef attributes) { - build(builder, result, resultType, basePtr, SmallVector(indices), - inbounds, attributes); -} - void GEPOp::build(OpBuilder &builder, OperationState &result, Type resultType, Type elementType, Value basePtr, ValueRange indices, bool inbounds, ArrayRef attributes) { @@ -712,35 +659,20 @@ verifyStructIndices(Type baseGEPType, GEPIndicesAdaptor indices, } LogicalResult LLVM::GEPOp::verify() { - if (failed(verifyOpaquePtr( - getOperation(), - llvm::cast(extractVectorElementType(getType())), - getElemType()))) - return failure(); - if (static_cast( llvm::count(getRawConstantIndices(), kDynamicIndex)) != getDynamicIndices().size()) return emitOpError("expected as many dynamic indices as specified in '") << getRawConstantIndicesAttrName().getValue() << "'"; - return verifyStructIndices(getSourceElementType(), getIndices(), + return verifyStructIndices(getElemType(), getIndices(), [&] { return emitOpError(); }); } -Type LLVM::GEPOp::getSourceElementType() { - if (std::optional elemType = getElemType()) - return *elemType; - - return llvm::cast( - extractVectorElementType(getBase().getType())) - .getElementType(); -} - Type GEPOp::getResultPtrElementType() { // Set the initial type currently being used for indexing. This will be // updated as the indices get walked over. - Type selectedType = getSourceElementType(); + Type selectedType = getElemType(); // Follow the indexed elements in the gep. auto indices = getIndices(); @@ -836,14 +768,6 @@ LogicalResult LoadOp::verify() { {AtomicOrdering::release, AtomicOrdering::acq_rel}); } -void LoadOp::build(OpBuilder &builder, OperationState &state, Value addr, - unsigned alignment, bool isVolatile, bool isNonTemporal) { - auto type = llvm::cast(addr.getType()).getElementType(); - assert(type && "must provide explicit element type to the constructor " - "when the pointer type is opaque"); - build(builder, state, type, addr, alignment, isVolatile, isNonTemporal); -} - void LoadOp::build(OpBuilder &builder, OperationState &state, Type type, Value addr, unsigned alignment, bool isVolatile, bool isNonTemporal, AtomicOrdering ordering, @@ -857,51 +781,6 @@ void LoadOp::build(OpBuilder &builder, OperationState &state, Type type, /*tbaa=*/nullptr); } -// Extract the pointee type from the LLVM pointer type wrapped in MLIR. Return -// the resulting type if any, null type if opaque pointers are used, and -// std::nullopt if the given type is not the pointer type. -static std::optional -getLoadStoreElementType(OpAsmParser &parser, Type type, SMLoc trailingTypeLoc) { - auto llvmTy = llvm::dyn_cast(type); - if (!llvmTy) { - parser.emitError(trailingTypeLoc, "expected LLVM pointer type"); - return std::nullopt; - } - return llvmTy.getElementType(); -} - -/// Parses the LoadOp type either using the typed or opaque pointer format. -// TODO: Drop once the typed pointer assembly format is not needed anymore. -static ParseResult parseLoadType(OpAsmParser &parser, Type &type, - Type &elementType) { - SMLoc trailingTypeLoc; - if (parser.getCurrentLocation(&trailingTypeLoc) || parser.parseType(type)) - return failure(); - - std::optional pointerElementType = - getLoadStoreElementType(parser, type, trailingTypeLoc); - if (!pointerElementType) - return failure(); - if (*pointerElementType) { - elementType = *pointerElementType; - return success(); - } - - if (parser.parseArrow() || parser.parseType(elementType)) - return failure(); - return success(); -} - -/// Prints the LoadOp type either using the typed or opaque pointer format. -// TODO: Drop once the typed pointer assembly format is not needed anymore. -static void printLoadType(OpAsmPrinter &printer, Operation *op, Type type, - Type elementType) { - printer << type; - auto pointerType = cast(type); - if (pointerType.isOpaque()) - printer << " -> " << elementType; -} - //===----------------------------------------------------------------------===// // StoreOp //===----------------------------------------------------------------------===// @@ -940,38 +819,6 @@ void StoreOp::build(OpBuilder &builder, OperationState &state, Value value, /*alias_scopes=*/nullptr, /*noalias_scopes=*/nullptr, /*tbaa=*/nullptr); } -/// Parses the StoreOp type either using the typed or opaque pointer format. -// TODO: Drop once the typed pointer assembly format is not needed anymore. -static ParseResult parseStoreType(OpAsmParser &parser, Type &elementType, - Type &type) { - SMLoc trailingTypeLoc; - if (parser.getCurrentLocation(&trailingTypeLoc) || - parser.parseType(elementType)) - return failure(); - - if (succeeded(parser.parseOptionalComma())) - return parser.parseType(type); - - // Extract the element type from the pointer type. - type = elementType; - std::optional pointerElementType = - getLoadStoreElementType(parser, type, trailingTypeLoc); - if (!pointerElementType) - return failure(); - elementType = *pointerElementType; - return success(); -} - -/// Prints the StoreOp type either using the typed or opaque pointer format. -// TODO: Drop once the typed pointer assembly format is not needed anymore. -static void printStoreType(OpAsmPrinter &printer, Operation *op, - Type elementType, Type type) { - auto pointerType = cast(type); - if (pointerType.isOpaque()) - printer << elementType << ", "; - printer << type; -} - //===----------------------------------------------------------------------===// // CallOp //===----------------------------------------------------------------------===// @@ -1055,22 +902,6 @@ void CallOp::build(OpBuilder &builder, OperationState &state, LLVMFuncOp func, /*access_groups=*/nullptr, /*alias_scopes=*/nullptr, /*noalias_scopes=*/nullptr, /*tbaa=*/nullptr); } - -void CallOp::build(OpBuilder &builder, OperationState &state, Value callee, - ValueRange args) { - auto calleeType = cast( - cast(callee.getType()).getElementType()); - SmallVector operands; - operands.reserve(1 + args.size()); - operands.push_back(callee); - llvm::append_range(operands, args); - return build(builder, state, getCallOpResultTypes(calleeType), - TypeAttr::get(calleeType), FlatSymbolRefAttr(), operands, - /*fastmathFlags=*/nullptr, /*branch_weights=*/nullptr, - /*access_groups=*/nullptr, /*alias_scopes=*/nullptr, - /*noalias_scopes=*/nullptr, /*tbaa=*/nullptr); -} - CallInterfaceCallable CallOp::getCallableForCallee() { // Direct call. if (FlatSymbolRefAttr calleeAttr = getCalleeAttr()) @@ -1145,10 +976,7 @@ LogicalResult CallOp::verifySymbolUses(SymbolTableCollection &symbolTable) { return emitOpError("indirect call expects a pointer as callee: ") << getOperand(0).getType(); - if (ptrType.isOpaque()) - return success(); - - fnType = ptrType.getElementType(); + return success(); } else { Operation *callee = symbolTable.lookupNearestSymbolFrom(*this, calleeName.getAttr()); @@ -1848,17 +1676,6 @@ AddressOfOp::verifySymbolUses(SymbolTableCollection &symbolTable) { return emitOpError("pointer address space must match address space of the " "referenced global"); - if (type.isOpaque()) - return success(); - - if (global && type.getElementType() != global.getType()) - return emitOpError( - "the type must be a pointer to the type of the referenced global"); - - if (function && type.getElementType() != function.getFunctionType()) - return emitOpError( - "the type must be a pointer to the type of the referenced function"); - return success(); } @@ -2135,9 +1952,13 @@ static bool isZeroAttribute(Attribute value) { } LogicalResult GlobalOp::verify() { - if (!LLVMPointerType::isValidElementType(getType())) + bool validType = isCompatibleOuterType(getType()) + ? !llvm::isa(getType()) + : llvm::isa(getType()); + if (!validType) return emitOpError( - "expects type to be a valid element type for an LLVM pointer"); + "expects type to be a valid element type for an LLVM global"); if ((*this)->getParentOp() && !satisfiesLLVMModule((*this)->getParentOp())) return emitOpError("must appear at the module level"); @@ -2733,11 +2554,7 @@ void AtomicRMWOp::build(OpBuilder &builder, OperationState &state, } LogicalResult AtomicRMWOp::verify() { - auto ptrType = llvm::cast(getPtr().getType()); auto valType = getVal().getType(); - if (!ptrType.isOpaque() && valType != ptrType.getElementType()) - return emitOpError("expected LLVM IR element type for operand #0 to " - "match type for operand #1"); if (getBinOp() == AtomicBinOp::fadd || getBinOp() == AtomicBinOp::fsub || getBinOp() == AtomicBinOp::fmin || getBinOp() == AtomicBinOp::fmax) { if (!mlir::LLVM::isCompatibleFloatingPointType(valType)) @@ -2790,9 +2607,6 @@ LogicalResult AtomicCmpXchgOp::verify() { if (!ptrType) return emitOpError("expected LLVM IR pointer type for operand #0"); auto valType = getVal().getType(); - if (!ptrType.isOpaque() && valType != ptrType.getElementType()) - return emitOpError("expected LLVM IR element type for operand #0 to " - "match type for all other operands"); if (!isTypeCompatibleWithAtomicOp(valType, /*isPointerTypeAllowed=*/true)) return emitOpError("unexpected LLVM IR type"); @@ -2987,7 +2801,7 @@ OpFoldResult LLVM::GEPOp::fold(FoldAdaptor adaptor) { if (changed) { SmallVector rawConstantIndices; SmallVector dynamicIndices; - destructureIndices(getSourceElementType(), gepArgs, rawConstantIndices, + destructureIndices(getElemType(), gepArgs, rawConstantIndices, dynamicIndices); getDynamicIndicesMutable().assign(dynamicIndices); @@ -3185,14 +2999,7 @@ LogicalResult LLVMDialect::verifyParameterAttribute(Operation *op, auto checkPointerTypeMatches = [&]() -> LogicalResult { if (failed(checkPointerType())) return failure(); - auto ptrType = llvm::cast(paramType); - auto typeAttr = llvm::cast(paramAttr.getValue()); - if (!ptrType.isOpaque() && ptrType.getElementType() != typeAttr.getValue()) - return op->emitError() - << name - << " attribute attached to LLVM pointer argument of " - "different type"; return success(); }; @@ -3317,8 +3124,7 @@ Operation *LLVMDialect::materializeConstant(OpBuilder &builder, Attribute value, Value mlir::LLVM::createGlobalString(Location loc, OpBuilder &builder, StringRef name, StringRef value, - LLVM::Linkage linkage, - bool useOpaquePointers) { + LLVM::Linkage linkage) { assert(builder.getInsertionBlock() && builder.getInsertionBlock()->getParentOp() && "expected builder to point to a block constrained in an op"); @@ -3334,19 +3140,11 @@ Value mlir::LLVM::createGlobalString(Location loc, OpBuilder &builder, loc, type, /*isConstant=*/true, linkage, name, builder.getStringAttr(value), /*alignment=*/0); - LLVMPointerType resultType; - LLVMPointerType charPtr; - if (!useOpaquePointers) { - resultType = LLVMPointerType::get(type); - charPtr = LLVMPointerType::get(IntegerType::get(ctx, 8)); - } else { - resultType = charPtr = LLVMPointerType::get(ctx); - } - + LLVMPointerType ptrType = LLVMPointerType::get(ctx); // Get the pointer to the first character in the global string. - Value globalPtr = builder.create(loc, resultType, - global.getSymNameAttr()); - return builder.create(loc, charPtr, type, globalPtr, + Value globalPtr = + builder.create(loc, ptrType, global.getSymNameAttr()); + return builder.create(loc, ptrType, type, globalPtr, ArrayRef{0, 0}); } diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp index 2cdc5d9fff84e7..cc2a917e58f639 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp @@ -32,7 +32,7 @@ llvm::SmallVector LLVM::AllocaOp::getPromotableSlots() { if (!getOperation()->getBlock()->isEntryBlock()) return {}; - return {MemorySlot{getResult(), getResultPtrElementType()}}; + return {MemorySlot{getResult(), getElemType()}}; } Value LLVM::AllocaOp::getDefaultValue(const MemorySlot &slot, @@ -61,8 +61,7 @@ SmallVector LLVM::AllocaOp::getDestructurableSlots() { if (!mlir::matchPattern(getArraySize(), m_One())) return {}; - Type elemType = getResultPtrElementType(); - auto destructurable = dyn_cast(elemType); + auto destructurable = dyn_cast(getElemType()); if (!destructurable) return {}; @@ -75,7 +74,8 @@ SmallVector LLVM::AllocaOp::getDestructurableSlots() { for (Attribute index : llvm::make_first_range(destructuredType.value())) allocaTypeMap.insert({index, LLVM::LLVMPointerType::get(getContext())}); - return {DestructurableMemorySlot{{getResult(), elemType}, {allocaTypeMap}}}; + return { + DestructurableMemorySlot{{getResult(), getElemType()}, {allocaTypeMap}}}; } DenseMap @@ -83,12 +83,9 @@ LLVM::AllocaOp::destructure(const DestructurableMemorySlot &slot, const SmallPtrSetImpl &usedIndices, RewriterBase &rewriter) { assert(slot.ptr == getResult()); - Type elemType = - getElemType() ? *getElemType() : getResult().getType().getElementType(); - rewriter.setInsertionPointAfter(*this); - auto destructurableType = cast(elemType); + auto destructurableType = cast(getElemType()); DenseMap slotMap; for (Attribute index : usedIndices) { Type elemType = destructurableType.getTypeAtIndex(index); @@ -337,11 +334,6 @@ bool LLVM::GEPOp::canRewire(const DestructurableMemorySlot &slot, if (!basePtrType) return false; - // Typed pointers are not supported. This should be removed once typed - // pointers are removed from the LLVM dialect. - if (!basePtrType.isOpaque()) - return false; - if (getBase() != slot.ptr || slot.elemType != getElemType()) return false; if (!isFirstIndexZero(*this)) diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMTypes.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMTypes.cpp index bc8300a8b7329e..8841aa8362569a 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/LLVMTypes.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMTypes.cpp @@ -75,40 +75,6 @@ static void printFunctionTypes(AsmPrinter &p, ArrayRef params, p << ')'; } -//===----------------------------------------------------------------------===// -// custom -//===----------------------------------------------------------------------===// - -static ParseResult parsePointer(AsmParser &p, Type &elementType, - unsigned &addressSpace) { - // `<` addressSpace `>` - OptionalParseResult result = p.parseOptionalInteger(addressSpace); - if (result.has_value()) { - if (failed(result.value())) - return failure(); - elementType = Type(); - return success(); - } - - if (parsePrettyLLVMType(p, elementType)) - return failure(); - if (succeeded(p.parseOptionalComma())) - return p.parseInteger(addressSpace); - - return success(); -} - -static void printPointer(AsmPrinter &p, Type elementType, - unsigned addressSpace) { - if (elementType) - printPrettyLLVMType(p, elementType); - if (addressSpace != 0) { - if (elementType) - p << ", "; - p << addressSpace; - } -} - //===----------------------------------------------------------------------===// // custom //===----------------------------------------------------------------------===// @@ -285,33 +251,6 @@ LLVMFunctionType::verify(function_ref emitError, return success(); } -//===----------------------------------------------------------------------===// -// LLVMPointerType -//===----------------------------------------------------------------------===// - -bool LLVMPointerType::isValidElementType(Type type) { - if (!type) - return true; - return isCompatibleOuterType(type) - ? !llvm::isa(type) - : llvm::isa(type); -} - -LLVMPointerType LLVMPointerType::get(Type pointee, unsigned addressSpace) { - assert(pointee && "expected non-null subtype, pass the context instead if " - "the opaque pointer type is desired"); - return Base::get(pointee.getContext(), pointee, addressSpace); -} - -LogicalResult -LLVMPointerType::verify(function_ref emitError, - Type pointee, unsigned) { - if (!isValidElementType(pointee)) - return emitError() << "invalid pointer element type: " << pointee; - return success(); -} - //===----------------------------------------------------------------------===// // DataLayoutTypeInterface @@ -369,9 +308,7 @@ LLVMPointerType::getTypeSizeInBits(const DataLayout &dataLayout, // For other memory spaces, use the size of the pointer to the default memory // space. - if (isOpaque()) - return dataLayout.getTypeSizeInBits(get(getContext())); - return dataLayout.getTypeSizeInBits(get(getElementType())); + return dataLayout.getTypeSizeInBits(get(getContext())); } unsigned LLVMPointerType::getABIAlignment(const DataLayout &dataLayout, @@ -380,9 +317,7 @@ unsigned LLVMPointerType::getABIAlignment(const DataLayout &dataLayout, getPointerDataLayoutEntry(params, *this, PtrDLEntryPos::Abi)) return *alignment; - if (isOpaque()) - return dataLayout.getTypeABIAlignment(get(getContext())); - return dataLayout.getTypeABIAlignment(get(getElementType())); + return dataLayout.getTypeABIAlignment(get(getContext())); } unsigned @@ -392,9 +327,7 @@ LLVMPointerType::getPreferredAlignment(const DataLayout &dataLayout, getPointerDataLayoutEntry(params, *this, PtrDLEntryPos::Preferred)) return *alignment; - if (isOpaque()) - return dataLayout.getTypePreferredAlignment(get(getContext())); - return dataLayout.getTypePreferredAlignment(get(getElementType())); + return dataLayout.getTypePreferredAlignment(get(getContext())); } bool LLVMPointerType::areCompatible(DataLayoutEntryListRef oldLayout, @@ -440,7 +373,6 @@ LogicalResult LLVMPointerType::verifyEntries(DataLayoutEntryListRef entries, for (DataLayoutEntryInterface entry : entries) { if (!entry.isTypeEntry()) continue; - auto key = llvm::cast(entry.getKey().get()); auto values = llvm::dyn_cast(entry.getValue()); if (!values || (values.size() != 3 && values.size() != 4)) { return emitError(loc) @@ -448,10 +380,6 @@ LogicalResult LLVMPointerType::verifyEntries(DataLayoutEntryListRef entries, << " to be a dense integer elements attribute with 3 or 4 " "elements"; } - if (key.getElementType() && !key.getElementType().isInteger(8)) { - return emitError(loc) << "unexpected layout attribute for pointer to " - << key.getElementType(); - } if (extractPointerSpecValue(values, PtrDLEntryPos::Abi) > extractPointerSpecValue(values, PtrDLEntryPos::Preferred)) { return emitError(loc) << "preferred alignment is expected to be at least " @@ -869,11 +797,7 @@ static bool isCompatibleImpl(Type type, DenseSet &compatibleTypes) { return vecType.getRank() == 1 && isCompatible(vecType.getElementType()); }) - .Case([&](auto pointerType) { - if (pointerType.isOpaque()) - return true; - return isCompatible(pointerType.getElementType()); - }) + .Case([&](auto pointerType) { return true; }) .Case([&](auto extType) { return llvm::all_of(extType.getTypeParams(), isCompatible); }) diff --git a/mlir/lib/Dialect/LLVMIR/Transforms/TypeConsistency.cpp b/mlir/lib/Dialect/LLVMIR/Transforms/TypeConsistency.cpp index 9731689e551762..b094c650ff1932 100644 --- a/mlir/lib/Dialect/LLVMIR/Transforms/TypeConsistency.cpp +++ b/mlir/lib/Dialect/LLVMIR/Transforms/TypeConsistency.cpp @@ -92,10 +92,6 @@ LogicalResult AddFieldGetterToStructDirectUse::matchAndRewrite( LoadOp load, PatternRewriter &rewriter) const { PatternRewriter::InsertionGuard guard(rewriter); - // Load from typed pointers are not supported. - if (!load.getAddr().getType().isOpaque()) - return failure(); - Type inconsistentElementType = isElementTypeInconsistent(load.getAddr(), load.getType()); if (!inconsistentElementType) @@ -129,10 +125,6 @@ LogicalResult AddFieldGetterToStructDirectUse::matchAndRewrite( StoreOp store, PatternRewriter &rewriter) const { PatternRewriter::InsertionGuard guard(rewriter); - // Store to typed pointers are not supported. - if (!store.getAddr().getType().isOpaque()) - return failure(); - Type inconsistentElementType = isElementTypeInconsistent(store.getAddr(), store.getValue().getType()); if (!inconsistentElementType) @@ -172,9 +164,9 @@ static std::optional gepToByteOffset(DataLayout &layout, GEPOp gep) { indices.push_back(indexInt.getInt()); } - uint64_t offset = indices[0] * layout.getTypeSize(gep.getSourceElementType()); + uint64_t offset = indices[0] * layout.getTypeSize(gep.getElemType()); - Type currentType = gep.getSourceElementType(); + Type currentType = gep.getElemType(); for (uint32_t index : llvm::drop_begin(indices)) { bool shouldCancel = TypeSwitch(currentType) @@ -579,7 +571,7 @@ LogicalResult SplitStores::matchAndRewrite(StoreOp store, return failure(); offset = *byteOffset; - typeHint = gepOp.getSourceElementType(); + typeHint = gepOp.getElemType(); address = gepOp.getBase(); } } @@ -661,8 +653,7 @@ LogicalResult SplitGEP::matchAndRewrite(GEPOp gepOp, // Split of the first GEP using the first two indices. auto subGepOp = rewriter.create( - gepOp.getLoc(), gepOp.getType(), gepOp.getSourceElementType(), - gepOp.getBase(), + gepOp.getLoc(), gepOp.getType(), gepOp.getElemType(), gepOp.getBase(), llvm::map_to_vector(llvm::make_range(indices.begin(), splitIter), indexToGEPArg), gepOp.getInbounds()); diff --git a/mlir/lib/Dialect/MemRef/IR/MemRefMemorySlot.cpp b/mlir/lib/Dialect/MemRef/IR/MemRefMemorySlot.cpp index 93ec2bcdf58fa4..be301c191d5139 100644 --- a/mlir/lib/Dialect/MemRef/IR/MemRefMemorySlot.cpp +++ b/mlir/lib/Dialect/MemRef/IR/MemRefMemorySlot.cpp @@ -187,14 +187,22 @@ DeletionKind memref::LoadOp::removeBlockingUses( return DeletionKind::Delete; } -/// Returns the index of a memref in attribute form, given its indices. +/// Returns the index of a memref in attribute form, given its indices. Returns +/// a null pointer if whether the indices form a valid index for the provided +/// MemRefType cannot be computed. The indices must come from a valid memref +/// StoreOp or LoadOp. static Attribute getAttributeIndexFromIndexOperands(MLIRContext *ctx, - ValueRange indices) { + ValueRange indices, + MemRefType memrefType) { SmallVector index; - for (Value coord : indices) { + for (auto [coord, dimSize] : llvm::zip(indices, memrefType.getShape())) { IntegerAttr coordAttr; if (!matchPattern(coord, m_Constant(&coordAttr))) return {}; + // MemRefType shape dimensions are always positive (checked by verifier). + std::optional coordInt = coordAttr.getValue().tryZExtValue(); + if (!coordInt || coordInt.value() >= static_cast(dimSize)) + return {}; index.push_back(coordAttr); } return ArrayAttr::get(ctx, index); @@ -205,8 +213,8 @@ bool memref::LoadOp::canRewire(const DestructurableMemorySlot &slot, SmallVectorImpl &mustBeSafelyUsed) { if (slot.ptr != getMemRef()) return false; - Attribute index = - getAttributeIndexFromIndexOperands(getContext(), getIndices()); + Attribute index = getAttributeIndexFromIndexOperands( + getContext(), getIndices(), getMemRefType()); if (!index) return false; usedIndices.insert(index); @@ -216,8 +224,8 @@ bool memref::LoadOp::canRewire(const DestructurableMemorySlot &slot, DeletionKind memref::LoadOp::rewire(const DestructurableMemorySlot &slot, DenseMap &subslots, RewriterBase &rewriter) { - Attribute index = - getAttributeIndexFromIndexOperands(getContext(), getIndices()); + Attribute index = getAttributeIndexFromIndexOperands( + getContext(), getIndices(), getMemRefType()); const MemorySlot &memorySlot = subslots.at(index); rewriter.updateRootInPlace(*this, [&]() { setMemRef(memorySlot.ptr); @@ -258,8 +266,8 @@ bool memref::StoreOp::canRewire(const DestructurableMemorySlot &slot, SmallVectorImpl &mustBeSafelyUsed) { if (slot.ptr != getMemRef() || getValue() == slot.ptr) return false; - Attribute index = - getAttributeIndexFromIndexOperands(getContext(), getIndices()); + Attribute index = getAttributeIndexFromIndexOperands( + getContext(), getIndices(), getMemRefType()); if (!index || !slot.elementPtrs.contains(index)) return false; usedIndices.insert(index); @@ -269,8 +277,8 @@ bool memref::StoreOp::canRewire(const DestructurableMemorySlot &slot, DeletionKind memref::StoreOp::rewire(const DestructurableMemorySlot &slot, DenseMap &subslots, RewriterBase &rewriter) { - Attribute index = - getAttributeIndexFromIndexOperands(getContext(), getIndices()); + Attribute index = getAttributeIndexFromIndexOperands( + getContext(), getIndices(), getMemRefType()); const MemorySlot &memorySlot = subslots.at(index); rewriter.updateRootInPlace(*this, [&]() { setMemRef(memorySlot.ptr); diff --git a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp index df64d561f46cb3..ff2eb9ebfc42f5 100644 --- a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp +++ b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp @@ -27,14 +27,19 @@ using namespace acc; #include "mlir/Dialect/OpenACC/OpenACCTypeInterfaces.cpp.inc" namespace { -/// Model for pointer-like types that already provide a `getElementType` method. -template -struct PointerLikeModel - : public PointerLikeType::ExternalModel, T> { +struct MemRefPointerLikeModel + : public PointerLikeType::ExternalModel { Type getElementType(Type pointer) const { - return llvm::cast(pointer).getElementType(); + return llvm::cast(pointer).getElementType(); } }; + +struct LLVMPointerPointerLikeModel + : public PointerLikeType::ExternalModel { + Type getElementType(Type pointer) const { return Type(); } +}; } // namespace //===----------------------------------------------------------------------===// @@ -58,9 +63,9 @@ void OpenACCDialect::initialize() { // By attaching interfaces here, we make the OpenACC dialect dependent on // the other dialects. This is probably better than having dialects like LLVM // and memref be dependent on OpenACC. - LLVM::LLVMPointerType::attachInterface< - PointerLikeModel>(*getContext()); - MemRefType::attachInterface>(*getContext()); + MemRefType::attachInterface(*getContext()); + LLVM::LLVMPointerType::attachInterface( + *getContext()); } //===----------------------------------------------------------------------===// @@ -1023,17 +1028,13 @@ void EnterDataOp::getCanonicalizationPatterns(RewritePatternSet &results, // AtomicReadOp //===----------------------------------------------------------------------===// -LogicalResult AtomicReadOp::verify() { - return verifyCommon(); -} +LogicalResult AtomicReadOp::verify() { return verifyCommon(); } //===----------------------------------------------------------------------===// // AtomicWriteOp //===----------------------------------------------------------------------===// -LogicalResult AtomicWriteOp::verify() { - return verifyCommon(); -} +LogicalResult AtomicWriteOp::verify() { return verifyCommon(); } //===----------------------------------------------------------------------===// // AtomicUpdateOp @@ -1054,13 +1055,9 @@ LogicalResult AtomicUpdateOp::canonicalize(AtomicUpdateOp op, return failure(); } -LogicalResult AtomicUpdateOp::verify() { - return verifyCommon(); -} +LogicalResult AtomicUpdateOp::verify() { return verifyCommon(); } -LogicalResult AtomicUpdateOp::verifyRegions() { - return verifyRegionsCommon(); -} +LogicalResult AtomicUpdateOp::verifyRegions() { return verifyRegionsCommon(); } //===----------------------------------------------------------------------===// // AtomicCaptureOp @@ -1084,9 +1081,7 @@ AtomicUpdateOp AtomicCaptureOp::getAtomicUpdateOp() { return dyn_cast(getSecondOp()); } -LogicalResult AtomicCaptureOp::verifyRegions() { - return verifyRegionsCommon(); -} +LogicalResult AtomicCaptureOp::verifyRegions() { return verifyRegionsCommon(); } //===----------------------------------------------------------------------===// // DeclareEnterOp diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp index 3b792a26d1823f..480af0e1307c15 100644 --- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp +++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp @@ -39,15 +39,20 @@ using namespace mlir; using namespace mlir::omp; namespace { -/// Model for pointer-like types that already provide a `getElementType` method. -template -struct PointerLikeModel - : public PointerLikeType::ExternalModel, T> { +struct MemRefPointerLikeModel + : public PointerLikeType::ExternalModel { Type getElementType(Type pointer) const { - return llvm::cast(pointer).getElementType(); + return llvm::cast(pointer).getElementType(); } }; +struct LLVMPointerPointerLikeModel + : public PointerLikeType::ExternalModel { + Type getElementType(Type pointer) const { return Type(); } +}; + struct OpenMPDialectFoldInterface : public DialectFoldInterface { using DialectFoldInterface::DialectFoldInterface; @@ -73,11 +78,9 @@ void OpenMPDialect::initialize() { >(); addInterface(); - LLVM::LLVMPointerType::attachInterface< - PointerLikeModel>(*getContext()); - MemRefType::attachInterface>(*getContext()); - LLVM::LLVMPointerType::attachInterface< - PointerLikeModel>(*getContext()); + MemRefType::attachInterface(*getContext()); + LLVM::LLVMPointerType::attachInterface( + *getContext()); // Attach default offload module interface to module op to access // offload functionality through @@ -691,6 +694,9 @@ static ParseResult parseMapClause(OpAsmParser &parser, IntegerAttr &mapType) { if (mapTypeMod == "always") mapTypeBits |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS; + if (mapTypeMod == "implicit") + mapTypeBits |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT; + if (mapTypeMod == "close") mapTypeBits |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_CLOSE; @@ -737,6 +743,9 @@ static void printMapClause(OpAsmPrinter &p, Operation *op, if (mapTypeToBitFlag(mapTypeBits, llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS)) mapTypeStrs.push_back("always"); + if (mapTypeToBitFlag(mapTypeBits, + llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT)) + mapTypeStrs.push_back("implicit"); if (mapTypeToBitFlag(mapTypeBits, llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_CLOSE)) mapTypeStrs.push_back("close"); @@ -777,6 +786,64 @@ static void printMapClause(OpAsmPrinter &p, Operation *op, } } +static ParseResult +parseMapEntries(OpAsmParser &parser, + SmallVectorImpl &mapOperands, + SmallVectorImpl &mapOperandTypes) { + OpAsmParser::UnresolvedOperand arg; + OpAsmParser::UnresolvedOperand blockArg; + Type argType; + auto parseEntries = [&]() -> ParseResult { + if (parser.parseOperand(arg) || parser.parseArrow() || + parser.parseOperand(blockArg)) + return failure(); + mapOperands.push_back(arg); + return success(); + }; + + auto parseTypes = [&]() -> ParseResult { + if (parser.parseType(argType)) + return failure(); + mapOperandTypes.push_back(argType); + return success(); + }; + + if (parser.parseCommaSeparatedList(parseEntries)) + return failure(); + + if (parser.parseColon()) + return failure(); + + if (parser.parseCommaSeparatedList(parseTypes)) + return failure(); + + return success(); +} + +static void printMapEntries(OpAsmPrinter &p, Operation *op, + OperandRange mapOperands, + TypeRange mapOperandTypes) { + auto ®ion = op->getRegion(0); + unsigned argIndex = 0; + + for (const auto &mapOp : mapOperands) { + const auto &blockArg = region.front().getArgument(argIndex); + p << mapOp << " -> " << blockArg; + argIndex++; + if (argIndex < mapOperands.size()) + p << ", "; + } + p << " : "; + + argIndex = 0; + for (const auto &mapType : mapOperandTypes) { + p << mapType; + argIndex++; + if (argIndex < mapOperands.size()) + p << ", "; + } +} + static void printCaptureType(OpAsmPrinter &p, Operation *op, VariableCaptureKindAttr mapCaptureType) { std::string typeCapStr; @@ -823,6 +890,15 @@ static LogicalResult verifyMapClause(Operation *op, OperandRange mapOperands) { if (auto MapInfoOp = mlir::dyn_cast(mapOp.getDefiningOp())) { + if (MapInfoOp.getVal() && MapInfoOp.getVarPtr()) + emitError(op->getLoc(), "only one of val or var_ptr must be used"); + + if (!MapInfoOp.getVal() && !MapInfoOp.getVarPtr()) + emitError(op->getLoc(), "missing val or var_ptr"); + + if (!MapInfoOp.getVarPtr() && MapInfoOp.getVarType().has_value()) + emitError(op->getLoc(), "var_type supplied without var_ptr"); + if (!MapInfoOp.getMapType().has_value()) emitError(op->getLoc(), "missing map type for map operand"); @@ -1342,9 +1418,7 @@ LogicalResult AtomicUpdateOp::verify() { return verifySynchronizationHint(*this, getHintVal()); } -LogicalResult AtomicUpdateOp::verifyRegions() { - return verifyRegionsCommon(); -} +LogicalResult AtomicUpdateOp::verifyRegions() { return verifyRegionsCommon(); } //===----------------------------------------------------------------------===// // Verifier for AtomicCaptureOp diff --git a/mlir/lib/Dialect/Vector/IR/VectorOps.cpp b/mlir/lib/Dialect/Vector/IR/VectorOps.cpp index 60416f550ee619..69cbdcd3f536f9 100644 --- a/mlir/lib/Dialect/Vector/IR/VectorOps.cpp +++ b/mlir/lib/Dialect/Vector/IR/VectorOps.cpp @@ -1188,9 +1188,6 @@ OpFoldResult vector::ExtractElementOp::fold(FoldAdaptor adaptor) { if (!adaptor.getPosition()) return {}; - Attribute src = adaptor.getVector(); - Attribute pos = adaptor.getPosition(); - // Fold extractelement (splat X) -> X. if (auto splat = getVector().getDefiningOp()) return splat.getInput(); @@ -1200,13 +1197,16 @@ OpFoldResult vector::ExtractElementOp::fold(FoldAdaptor adaptor) { if (!llvm::isa(broadcast.getSource().getType())) return broadcast.getSource(); + auto src = dyn_cast_or_null(adaptor.getVector()); + auto pos = dyn_cast_or_null(adaptor.getPosition()); if (!pos || !src) return {}; - auto srcElements = llvm::cast(src).getValues(); + auto srcElements = src.getValues(); - auto attr = llvm::dyn_cast(pos); - uint64_t posIdx = attr.getInt(); + uint64_t posIdx = pos.getInt(); + if (posIdx >= srcElements.size()) + return {}; return srcElements[posIdx]; } @@ -2511,18 +2511,20 @@ OpFoldResult vector::InsertElementOp::fold(FoldAdaptor adaptor) { if (!adaptor.getPosition()) return {}; - Attribute src = adaptor.getSource(); - Attribute dst = adaptor.getDest(); - Attribute pos = adaptor.getPosition(); + auto src = dyn_cast_or_null(adaptor.getSource()); + auto dst = dyn_cast_or_null(adaptor.getDest()); + auto pos = dyn_cast_or_null(adaptor.getPosition()); if (!src || !dst || !pos) return {}; - auto dstElements = llvm::cast(dst).getValues(); + if (src.getType() != getDestVectorType().getElementType()) + return {}; + + auto dstElements = dst.getValues(); SmallVector results(dstElements); - auto attr = llvm::dyn_cast(pos); - uint64_t posIdx = attr.getInt(); + uint64_t posIdx = pos.getInt(); if (posIdx >= results.size()) return {}; results[posIdx] = src; diff --git a/mlir/lib/Target/Cpp/TranslateToCpp.cpp b/mlir/lib/Target/Cpp/TranslateToCpp.cpp index 6c95eb3d20dacd..291624c5480318 100644 --- a/mlir/lib/Target/Cpp/TranslateToCpp.cpp +++ b/mlir/lib/Target/Cpp/TranslateToCpp.cpp @@ -425,12 +425,13 @@ static LogicalResult printOperation(CppEmitter &emitter, emitc::CallOp callOp) { // Index attributes are treated specially as operand index. if (t.getType().isIndex()) { int64_t idx = t.getInt(); - if ((idx < 0) || (idx >= op.getNumOperands())) - return op.emitOpError("invalid operand index"); - if (!emitter.hasValueInScope(op.getOperand(idx))) + Value operand = op.getOperand(idx); + auto literalDef = + dyn_cast_if_present(operand.getDefiningOp()); + if (!literalDef && !emitter.hasValueInScope(operand)) return op.emitOpError("operand ") << idx << "'s value not defined in scope"; - os << emitter.getOrCreateName(op.getOperand(idx)); + os << emitter.getOrCreateName(operand); return success(); } } diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 1daf60b8659bb6..e6eacaed152287 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -1733,12 +1733,13 @@ void collectMapDataFromMapOperands(MapInfoData &mapData, "missing map info operation or incorrect map info operation type"); if (auto mapOp = mlir::dyn_cast_if_present( mapValue.getDefiningOp())) { - mapData.OriginalValue.push_back( - moduleTranslation.lookupValue(mapOp.getVarPtr())); + mapData.OriginalValue.push_back(moduleTranslation.lookupValue( + mapOp.getVarPtr() ? mapOp.getVarPtr() : mapOp.getVal())); mapData.Pointers.push_back(mapData.OriginalValue.back()); if (llvm::Value *refPtr = getRefPtrIfDeclareTarget( - mapOp.getVarPtr(), moduleTranslation)) { // declare target + mapOp.getVarPtr() ? mapOp.getVarPtr() : mapOp.getVal(), + moduleTranslation)) { // declare target mapData.IsDeclareTarget.push_back(true); mapData.BasePointers.push_back(refPtr); } else { // regular mapped variable @@ -1746,10 +1747,14 @@ void collectMapDataFromMapOperands(MapInfoData &mapData, mapData.BasePointers.push_back(mapData.OriginalValue.back()); } - mapData.Sizes.push_back(getSizeInBytes(dl, mapOp.getVarType(), mapOp, - builder, moduleTranslation)); - mapData.BaseType.push_back( - moduleTranslation.convertType(mapOp.getVarType())); + mapData.Sizes.push_back( + getSizeInBytes(dl, + mapOp.getVal() ? mapOp.getVal().getType() + : mapOp.getVarType().value(), + mapOp, builder, moduleTranslation)); + mapData.BaseType.push_back(moduleTranslation.convertType( + mapOp.getVal() ? mapOp.getVal().getType() + : mapOp.getVarType().value())); mapData.MapClause.push_back(mapOp.getOperation()); mapData.Types.push_back( llvm::omp::OpenMPOffloadMappingFlags(mapOp.getMapType().value())); @@ -1796,6 +1801,13 @@ static void genMapInfos(llvm::IRBuilderBase &builder, else if (isTargetParams) mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM; + if (auto mapInfoOp = dyn_cast(mapData.MapClause[i])) + if (mapInfoOp.getMapCaptureType().value() == + mlir::omp::VariableCaptureKind::ByCopy && + !(mapInfoOp.getVarType().has_value() && + mapInfoOp.getVarType()->isa())) + mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_LITERAL; + combinedInfo.BasePointers.emplace_back(mapData.BasePointers[i]); combinedInfo.Pointers.emplace_back(mapData.Pointers[i]); combinedInfo.DevicePointers.emplace_back(mapData.DevicePointers[i]); @@ -2318,6 +2330,19 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder, auto targetOp = cast(opInst); auto &targetRegion = targetOp.getRegion(); + DataLayout dl = DataLayout(opInst.getParentOfType()); + SmallVector mapOperands = targetOp.getMapOperands(); + + // Remove mapOperands/blockArgs that have no use inside the region. + assert(mapOperands.size() == targetRegion.getNumArguments() && + "Number of mapOperands must be same as block_arguments"); + for (size_t i = 0; i < mapOperands.size(); i++) { + if (targetRegion.getArgument(i).use_empty()) { + targetRegion.eraseArgument(i); + mapOperands.erase(&mapOperands[i]); + i--; + } + } LogicalResult bodyGenStatus = success(); @@ -2325,6 +2350,16 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder, auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) -> InsertPointTy { builder.restoreIP(codeGenIP); + unsigned argIndex = 0; + for (auto &mapOp : mapOperands) { + auto mapInfoOp = + mlir::dyn_cast(mapOp.getDefiningOp()); + llvm::Value *mapOpValue = moduleTranslation.lookupValue( + mapInfoOp.getVarPtr() ? mapInfoOp.getVarPtr() : mapInfoOp.getVal()); + const auto &arg = targetRegion.front().getArgument(argIndex); + moduleTranslation.mapValue(arg, mapOpValue); + argIndex++; + } llvm::BasicBlock *exitBlock = convertOmpOpRegions( targetRegion, "omp.target", builder, moduleTranslation, bodyGenStatus); builder.SetInsertPoint(exitBlock); @@ -2352,8 +2387,6 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder::InsertPointTy allocaIP = findAllocaInsertPoint(builder, moduleTranslation); - DataLayout dl = DataLayout(opInst.getParentOfType()); - llvm::SmallVector mapOperands = targetOp.getMapOperands(); MapInfoData mapData; collectMapDataFromMapOperands(mapData, mapOperands, moduleTranslation, dl, builder); @@ -2459,6 +2492,8 @@ convertDeclareTargetAttr(Operation *op, mlir::omp::DeclareTargetAttr attribute, if (declareType == omp::DeclareTargetDeviceType::host) { llvm::Function *llvmFunc = moduleTranslation.lookupFunction(funcOp.getName()); + llvmFunc->replaceAllUsesWith( + llvm::UndefValue::get(llvmFunc->getType())); llvmFunc->dropAllReferences(); llvmFunc->eraseFromParent(); } diff --git a/mlir/test/CAPI/ir.c b/mlir/test/CAPI/ir.c index 8d5dcbf62e85e2..315458a08b613e 100644 --- a/mlir/test/CAPI/ir.c +++ b/mlir/test/CAPI/ir.c @@ -746,13 +746,27 @@ static int printBuiltinTypes(MlirContext ctx) { fprintf(stderr, "\n"); // CHECK: vector<2x3xf32> + // Scalable vector type. + bool scalable[] = {false, true}; + MlirType scalableVector = mlirVectorTypeGetScalable( + sizeof(shape) / sizeof(int64_t), shape, scalable, f32); + if (!mlirTypeIsAVector(scalableVector)) + return 16; + if (!mlirVectorTypeIsScalable(scalableVector) || + mlirVectorTypeIsDimScalable(scalableVector, 0) || + !mlirVectorTypeIsDimScalable(scalableVector, 1)) + return 17; + mlirTypeDump(scalableVector); + fprintf(stderr, "\n"); + // CHECK: vector<2x[3]xf32> + // Ranked tensor type. MlirType rankedTensor = mlirRankedTensorTypeGet( sizeof(shape) / sizeof(int64_t), shape, f32, mlirAttributeGetNull()); if (!mlirTypeIsATensor(rankedTensor) || !mlirTypeIsARankedTensor(rankedTensor) || !mlirAttributeIsNull(mlirRankedTensorTypeGetEncoding(rankedTensor))) - return 16; + return 18; mlirTypeDump(rankedTensor); fprintf(stderr, "\n"); // CHECK: tensor<2x3xf32> @@ -762,7 +776,7 @@ static int printBuiltinTypes(MlirContext ctx) { if (!mlirTypeIsATensor(unrankedTensor) || !mlirTypeIsAUnrankedTensor(unrankedTensor) || mlirShapedTypeHasRank(unrankedTensor)) - return 17; + return 19; mlirTypeDump(unrankedTensor); fprintf(stderr, "\n"); // CHECK: tensor<*xf32> @@ -773,7 +787,7 @@ static int printBuiltinTypes(MlirContext ctx) { f32, sizeof(shape) / sizeof(int64_t), shape, memSpace2); if (!mlirTypeIsAMemRef(memRef) || !mlirAttributeEqual(mlirMemRefTypeGetMemorySpace(memRef), memSpace2)) - return 18; + return 20; mlirTypeDump(memRef); fprintf(stderr, "\n"); // CHECK: memref<2x3xf32, 2> @@ -785,7 +799,7 @@ static int printBuiltinTypes(MlirContext ctx) { mlirTypeIsAMemRef(unrankedMemRef) || !mlirAttributeEqual(mlirUnrankedMemrefGetMemorySpace(unrankedMemRef), memSpace4)) - return 19; + return 21; mlirTypeDump(unrankedMemRef); fprintf(stderr, "\n"); // CHECK: memref<*xf32, 4> @@ -796,7 +810,7 @@ static int printBuiltinTypes(MlirContext ctx) { if (!mlirTypeIsATuple(tuple) || mlirTupleTypeGetNumTypes(tuple) != 2 || !mlirTypeEqual(mlirTupleTypeGetType(tuple, 0), unrankedMemRef) || !mlirTypeEqual(mlirTupleTypeGetType(tuple, 1), f32)) - return 20; + return 22; mlirTypeDump(tuple); fprintf(stderr, "\n"); // CHECK: tuple, f32> @@ -808,16 +822,16 @@ static int printBuiltinTypes(MlirContext ctx) { mlirIntegerTypeGet(ctx, 64)}; MlirType funcType = mlirFunctionTypeGet(ctx, 2, funcInputs, 3, funcResults); if (mlirFunctionTypeGetNumInputs(funcType) != 2) - return 21; + return 23; if (mlirFunctionTypeGetNumResults(funcType) != 3) - return 22; + return 24; if (!mlirTypeEqual(funcInputs[0], mlirFunctionTypeGetInput(funcType, 0)) || !mlirTypeEqual(funcInputs[1], mlirFunctionTypeGetInput(funcType, 1))) - return 23; + return 25; if (!mlirTypeEqual(funcResults[0], mlirFunctionTypeGetResult(funcType, 0)) || !mlirTypeEqual(funcResults[1], mlirFunctionTypeGetResult(funcType, 1)) || !mlirTypeEqual(funcResults[2], mlirFunctionTypeGetResult(funcType, 2))) - return 24; + return 26; mlirTypeDump(funcType); fprintf(stderr, "\n"); // CHECK: (index, i1) -> (i16, i32, i64) @@ -832,7 +846,7 @@ static int printBuiltinTypes(MlirContext ctx) { !mlirStringRefEqual(mlirOpaqueTypeGetDialectNamespace(opaque), namespace) || !mlirStringRefEqual(mlirOpaqueTypeGetData(opaque), data)) - return 25; + return 27; mlirTypeDump(opaque); fprintf(stderr, "\n"); // CHECK: !dialect.type diff --git a/mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir b/mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir index bbf50617edf944..3fbeaebb592a4d 100644 --- a/mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir +++ b/mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir @@ -244,10 +244,12 @@ llvm.func @_QPomp_target_data_region(%a : !llvm.ptr, %i : !llvm.ptr) { // CHECK: %[[ARG_0:.*]]: !llvm.ptr, // CHECK: %[[ARG_1:.*]]: !llvm.ptr) { // CHECK: %[[VAL_0:.*]] = llvm.mlir.constant(64 : i32) : i32 -// CHECK: %[[MAP:.*]] = omp.map_info var_ptr(%[[ARG_0]] : !llvm.ptr, !llvm.array<1024 x i32>) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""} -// CHECK: omp.target thread_limit(%[[VAL_0]] : i32) map_entries(%[[MAP]] : !llvm.ptr) { +// CHECK: %[[MAP1:.*]] = omp.map_info var_ptr(%[[ARG_0]] : !llvm.ptr, !llvm.array<1024 x i32>) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""} +// CHECK: %[[MAP2:.*]] = omp.map_info var_ptr(%[[ARG_1]] : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = ""} +// CHECK: omp.target thread_limit(%[[VAL_0]] : i32) map_entries(%[[MAP1]] -> %[[BB_ARG0:.*]], %[[MAP2]] -> %[[BB_ARG1:.*]] : !llvm.ptr, !llvm.ptr) { +// CHECK: ^bb0(%[[BB_ARG0]]: !llvm.ptr, %[[BB_ARG1]]: !llvm.ptr): // CHECK: %[[VAL_1:.*]] = llvm.mlir.constant(10 : i32) : i32 -// CHECK: llvm.store %[[VAL_1]], %[[ARG_1]] : i32, !llvm.ptr +// CHECK: llvm.store %[[VAL_1]], %[[BB_ARG1]] : i32, !llvm.ptr // CHECK: omp.terminator // CHECK: } // CHECK: llvm.return @@ -256,9 +258,11 @@ llvm.func @_QPomp_target_data_region(%a : !llvm.ptr, %i : !llvm.ptr) { llvm.func @_QPomp_target(%a : !llvm.ptr, %i : !llvm.ptr) { %0 = llvm.mlir.constant(64 : i32) : i32 %1 = omp.map_info var_ptr(%a : !llvm.ptr, !llvm.array<1024 x i32>) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""} - omp.target thread_limit(%0 : i32) map_entries(%1 : !llvm.ptr) { + %3 = omp.map_info var_ptr(%i : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = ""} + omp.target thread_limit(%0 : i32) map_entries(%1 -> %arg0, %3 -> %arg1 : !llvm.ptr, !llvm.ptr) { + ^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr): %2 = llvm.mlir.constant(10 : i32) : i32 - llvm.store %2, %i : i32, !llvm.ptr + llvm.store %2, %arg1 : i32, !llvm.ptr omp.terminator } llvm.return @@ -449,7 +453,8 @@ llvm.func @sub_() { // CHECK: %[[C_14:.*]] = llvm.mlir.constant(1 : index) : i64 // CHECK: %[[BOUNDS1:.*]] = omp.bounds lower_bound(%[[C_12]] : i64) upper_bound(%[[C_11]] : i64) stride(%[[C_14]] : i64) start_idx(%[[C_14]] : i64) // CHECK: %[[MAP1:.*]] = omp.map_info var_ptr(%[[ARG_2]] : !llvm.ptr, !llvm.array<10 x i32>) map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS1]]) -> !llvm.ptr {name = ""} -// CHECK: omp.target map_entries(%[[MAP0]], %[[MAP1]] : !llvm.ptr, !llvm.ptr) { +// CHECK: omp.target map_entries(%[[MAP0]] -> %[[BB_ARG0:.*]], %[[MAP1]] -> %[[BB_ARG1:.*]] : !llvm.ptr, !llvm.ptr) { +// CHECK: ^bb0(%[[BB_ARG0]]: !llvm.ptr, %[[BB_ARG1]]: !llvm.ptr): // CHECK: omp.terminator // CHECK: } // CHECK: llvm.return @@ -468,7 +473,8 @@ llvm.func @_QPtarget_map_with_bounds(%arg0: !llvm.ptr, %arg1: !llvm.ptr, %arg2: %9 = llvm.mlir.constant(1 : index) : i64 %10 = omp.bounds lower_bound(%7 : i64) upper_bound(%6 : i64) stride(%9 : i64) start_idx(%9 : i64) %11 = omp.map_info var_ptr(%arg2 : !llvm.ptr, !llvm.array<10 x i32>) map_clauses(tofrom) capture(ByRef) bounds(%10) -> !llvm.ptr {name = ""} - omp.target map_entries(%5, %11 : !llvm.ptr, !llvm.ptr) { + omp.target map_entries(%5 -> %arg3, %11 -> %arg4: !llvm.ptr, !llvm.ptr) { + ^bb0(%arg3: !llvm.ptr, %arg4: !llvm.ptr): omp.terminator } llvm.return diff --git a/mlir/test/Dialect/LLVMIR/global.mlir b/mlir/test/Dialect/LLVMIR/global.mlir index a33fff3967e4d5..81178b2ef901f4 100644 --- a/mlir/test/Dialect/LLVMIR/global.mlir +++ b/mlir/test/Dialect/LLVMIR/global.mlir @@ -117,11 +117,10 @@ llvm.mlir.global internal protected unnamed_addr @protected(42 : i32) : i32 // ----- -// expected-error @+1 {{expects type to be a valid element type for an LLVM pointer}} +// expected-error @+1 {{expects type to be a valid element type for an LLVM global}} llvm.mlir.global internal constant @constant(37.0) : !llvm.label // ----- - // expected-error @+1 {{'addr_space' failed to satisfy constraint: 32-bit signless integer attribute whose value is non-negative}} "llvm.mlir.global"() ({}) {sym_name = "foo", global_type = i64, value = 42 : i64, addr_space = -1 : i32, linkage = #llvm.linkage} : () -> () diff --git a/mlir/test/Dialect/LLVMIR/invalid.mlir b/mlir/test/Dialect/LLVMIR/invalid.mlir index fe2f94454561a0..d6960f00f0428c 100644 --- a/mlir/test/Dialect/LLVMIR/invalid.mlir +++ b/mlir/test/Dialect/LLVMIR/invalid.mlir @@ -90,30 +90,23 @@ func.func @alloca_non_integer_alignment() { // ----- -func.func @alloca_opaque_ptr_no_type(%sz : i64) { - // expected-error@below {{expected 'elem_type' attribute if opaque pointer type is used}} - "llvm.alloca"(%sz) : (i64) -> !llvm.ptr -} - -// ----- - func.func @gep_missing_input_result_type(%pos : i64, %base : !llvm.ptr) { // expected-error@+1 {{2 operands present, but expected 0}} - llvm.getelementptr %base[%pos] : () -> () + llvm.getelementptr %base[%pos] : () -> (), i64 } // ----- func.func @gep_missing_input_type(%pos : i64, %base : !llvm.ptr) { // expected-error@+1 {{2 operands present, but expected 0}} - llvm.getelementptr %base[%pos] : () -> (!llvm.ptr) + llvm.getelementptr %base[%pos] : () -> (!llvm.ptr), i64 } // ----- func.func @gep_missing_result_type(%pos : i64, %base : !llvm.ptr) { // expected-error@+1 {{op requires one result}} - llvm.getelementptr %base[%pos] : (!llvm.ptr, i64) -> () + llvm.getelementptr %base[%pos] : (!llvm.ptr, i64) -> (), i64 } // ----- @@ -133,15 +126,8 @@ func.func @gep_too_few_dynamic(%base : !llvm.ptr) { // ----- func.func @load_non_llvm_type(%foo : memref) { - // expected-error@+1 {{expected LLVM pointer type}} - llvm.load %foo : memref -} - -// ----- - -func.func @load_non_ptr_type(%foo : f32) { - // expected-error@+1 {{expected LLVM pointer type}} - llvm.load %foo : f32 + // expected-error@+1 {{op operand #0 must be LLVM pointer type}} + llvm.load %foo : memref -> f32 } // ----- @@ -181,27 +167,6 @@ func.func @load_unaligned_atomic(%ptr : !llvm.ptr) { // ----- -func.func @store_non_llvm_type(%foo : memref, %bar : f32) { - // expected-error@+1 {{expected LLVM pointer type}} - llvm.store %bar, %foo : memref -} - -// ----- - -func.func @store_non_ptr_type(%foo : f32, %bar : f32) { - // expected-error@+1 {{expected LLVM pointer type}} - llvm.store %bar, %foo : f32 -} - -// ----- - -func.func @store_malformed_elem_type(%foo: !llvm.ptr, %bar: f32) { - // expected-error@+1 {{expected non-function type}} - llvm.store %bar, %foo : !llvm.ptr, "f32" -} - -// ----- - func.func @store_syncscope(%val : f32, %ptr : !llvm.ptr) { // expected-error@below {{expected syncscope to be null for non-atomic access}} "llvm.store"(%val, %ptr) {syncscope = "singlethread"} : (f32, !llvm.ptr) -> () @@ -632,14 +597,6 @@ func.func @nvvm_invalid_mma_8(%a0 : i32, %a1 : i32, // ----- -func.func @atomicrmw_expected_ptr(%f32 : f32) { - // expected-error@+1 {{operand #0 must be LLVM pointer to floating point LLVM type or LLVM pointer type or integer}} - %0 = "llvm.atomicrmw"(%f32, %f32) {bin_op=11, ordering=1} : (f32, f32) -> f32 - llvm.return -} - -// ----- - func.func @atomicrmw_mismatched_operands(%f32_ptr : !llvm.ptr, %f32 : f32) { // expected-error@+1 {{op failed to verify that result #0 and operand #1 have the same type}} %0 = "llvm.atomicrmw"(%f32_ptr, %f32) {bin_op=11, ordering=1} : (!llvm.ptr, f32) -> i32 @@ -672,14 +629,6 @@ func.func @atomicrmw_expected_int(%f32_ptr : !llvm.ptr, %f32 : f32) { // ----- -func.func @cmpxchg_expected_ptr(%f32 : f32) { - // expected-error@+1 {{op operand #0 must be LLVM pointer to integer or LLVM pointer type}} - %0 = "llvm.cmpxchg"(%f32, %f32, %f32) {success_ordering=2,failure_ordering=2} : (f32, f32, f32) -> !llvm.struct<(f32, i1)> - llvm.return -} - -// ----- - func.func @cmpxchg_mismatched_value_operands(%ptr : !llvm.ptr, %i32 : i32, %i64 : i64) { // expected-error@+1 {{op failed to verify that operand #1 and operand #2 have the same type}} %0 = "llvm.cmpxchg"(%ptr, %i32, %i64) {success_ordering=2,failure_ordering=2} : (!llvm.ptr, i32, i64) -> !llvm.struct<(i32, i1)> diff --git a/mlir/test/Dialect/MemRef/sroa.mlir b/mlir/test/Dialect/MemRef/sroa.mlir index d78053d8ea777e..40ab9b3483b833 100644 --- a/mlir/test/Dialect/MemRef/sroa.mlir +++ b/mlir/test/Dialect/MemRef/sroa.mlir @@ -132,9 +132,9 @@ func.func @no_dynamic_shape(%arg0: i32, %arg1: i32) -> i32 { // ----- -// CHECK-LABEL: func.func @no_out_of_bounds +// CHECK-LABEL: func.func @no_out_of_bound_write // CHECK-SAME: (%[[ARG0:.*]]: i32, %[[ARG1:.*]]: i32) -func.func @no_out_of_bounds(%arg0: i32, %arg1: i32) -> i32 { +func.func @no_out_of_bound_write(%arg0: i32, %arg1: i32) -> i32 { // CHECK: %[[C0:.*]] = arith.constant 0 : index %c0 = arith.constant 0 : index // CHECK: %[[C100:.*]] = arith.constant 100 : index @@ -152,3 +152,24 @@ func.func @no_out_of_bounds(%arg0: i32, %arg1: i32) -> i32 { // CHECK: return %[[RES]] : i32 return %res : i32 } + +// ----- + +// CHECK-LABEL: func.func @no_out_of_bound_load +// CHECK-SAME: (%[[ARG0:.*]]: i32, %[[ARG1:.*]]: i32) +func.func @no_out_of_bound_load(%arg0: i32, %arg1: i32) -> i32 { + // CHECK: %[[C0:.*]] = arith.constant 0 : index + %c0 = arith.constant 0 : index + // CHECK: %[[C100:.*]] = arith.constant 100 : index + %c100 = arith.constant 100 : index + // CHECK-NOT: = memref.alloca() + // CHECK: %[[ALLOCA:.*]] = memref.alloca() : memref<2xi32> + // CHECK-NOT: = memref.alloca() + %alloca = memref.alloca() : memref<2xi32> + // CHECK: memref.store %[[ARG0]], %[[ALLOCA]][%[[C0]]] + memref.store %arg0, %alloca[%c0] : memref<2xi32> + // CHECK: %[[RES:.*]] = memref.load %[[ALLOCA]][%[[C100]]] + %res = memref.load %alloca[%c100] : memref<2xi32> + // CHECK: return %[[RES]] : i32 + return %res : i32 +} diff --git a/mlir/test/Dialect/OpenMP/canonicalize.mlir b/mlir/test/Dialect/OpenMP/canonicalize.mlir index 8aff8f81188be5..de6c931ecc5fd9 100644 --- a/mlir/test/Dialect/OpenMP/canonicalize.mlir +++ b/mlir/test/Dialect/OpenMP/canonicalize.mlir @@ -131,8 +131,9 @@ func.func private @foo() -> () func.func @constant_hoisting_target(%x : !llvm.ptr) { omp.target { + ^bb0(%arg0: !llvm.ptr): %c1 = arith.constant 10 : i32 - llvm.store %c1, %x : i32, !llvm.ptr + llvm.store %c1, %arg0 : i32, !llvm.ptr omp.terminator } return @@ -141,4 +142,4 @@ func.func @constant_hoisting_target(%x : !llvm.ptr) { // CHECK-LABEL: func.func @constant_hoisting_target // CHECK-NOT: arith.constant // CHECK: omp.target -// CHECK-NEXT: arith.constant +// CHECK: arith.constant diff --git a/mlir/test/Dialect/OpenMP/invalid.mlir b/mlir/test/Dialect/OpenMP/invalid.mlir index 6f75f2a62e6413..42e9fb1c64baec 100644 --- a/mlir/test/Dialect/OpenMP/invalid.mlir +++ b/mlir/test/Dialect/OpenMP/invalid.mlir @@ -1617,7 +1617,9 @@ func.func @omp_threadprivate() { func.func @omp_target(%map1: memref) { %mapv = omp.map_info var_ptr(%map1 : memref, tensor) map_clauses(delete) capture(ByRef) -> memref {name = ""} // expected-error @below {{to, from, tofrom and alloc map types are permitted}} - omp.target map_entries(%mapv : memref){} + omp.target map_entries(%mapv -> %arg0: memref) { + ^bb0(%arg0: memref): + } return } @@ -1656,4 +1658,40 @@ func.func @omp_target_exit_data(%map1: memref) { return } +// ----- + +func.func @omp_map1(%map1: memref, %map2: i32) { + %mapv = omp.map_info var_ptr(%map1 : memref, tensor) val(%map2 : i32) map_clauses(tofrom) capture(ByRef) -> memref {name = ""} + // expected-error @below {{only one of val or var_ptr must be used}} + omp.target map_entries(%mapv -> %arg0: memref) { + ^bb0(%arg0: memref): + omp.terminator + } + return +} + +// ----- + +func.func @omp_map2(%map1: memref, %map2: i32) { + %mapv = omp.map_info var_ptr( : , tensor) val(%map2 : i32) map_clauses(tofrom) capture(ByRef) -> memref {name = ""} + // expected-error @below {{var_type supplied without var_ptr}} + omp.target map_entries(%mapv -> %arg0: memref) { + ^bb0(%arg0: memref): + omp.terminator + } + return +} + +// ----- + +func.func @omp_map3(%map1: memref, %map2: i32) { + %mapv = omp.map_info map_clauses(tofrom) capture(ByRef) -> memref {name = ""} + // expected-error @below {{missing val or var_ptr}} + omp.target map_entries(%mapv -> %arg0: memref) { + ^bb0(%arg0: memref): + omp.terminator + } + return +} + llvm.mlir.global internal @_QFsubEx() : i32 diff --git a/mlir/test/Dialect/OpenMP/ops.mlir b/mlir/test/Dialect/OpenMP/ops.mlir index d59a4f428118bf..4d88d9ac86fe16 100644 --- a/mlir/test/Dialect/OpenMP/ops.mlir +++ b/mlir/test/Dialect/OpenMP/ops.mlir @@ -492,16 +492,22 @@ func.func @omp_target(%if_cond : i1, %device : si32, %num_threads : i32, %map1: // Test with optional map clause. // CHECK: %[[MAP_A:.*]] = omp.map_info var_ptr(%[[VAL_1:.*]] : memref, tensor) map_clauses(tofrom) capture(ByRef) -> memref {name = ""} // CHECK: %[[MAP_B:.*]] = omp.map_info var_ptr(%[[VAL_2:.*]] : memref, tensor) map_clauses(exit_release_or_enter_alloc) capture(ByRef) -> memref {name = ""} - // CHECK: omp.target map_entries(%[[MAP_A]], %[[MAP_B]] : memref, memref) { + // CHECK: omp.target map_entries(%[[MAP_A]] -> {{.*}}, %[[MAP_B]] -> {{.*}} : memref, memref) { %mapv1 = omp.map_info var_ptr(%map1 : memref, tensor) map_clauses(tofrom) capture(ByRef) -> memref {name = ""} %mapv2 = omp.map_info var_ptr(%map2 : memref, tensor) map_clauses(exit_release_or_enter_alloc) capture(ByRef) -> memref {name = ""} - omp.target map_entries(%mapv1, %mapv2 : memref, memref){} + omp.target map_entries(%mapv1 -> %arg0, %mapv2 -> %arg1 : memref, memref) { + ^bb0(%arg0: memref, %arg1: memref): + omp.terminator + } // CHECK: %[[MAP_C:.*]] = omp.map_info var_ptr(%[[VAL_1:.*]] : memref, tensor) map_clauses(to) capture(ByRef) -> memref {name = ""} // CHECK: %[[MAP_D:.*]] = omp.map_info var_ptr(%[[VAL_2:.*]] : memref, tensor) map_clauses(always, from) capture(ByRef) -> memref {name = ""} - // CHECK: omp.target map_entries(%[[MAP_C]], %[[MAP_D]] : memref, memref) { + // CHECK: omp.target map_entries(%[[MAP_C]] -> {{.*}}, %[[MAP_D]] -> {{.*}} : memref, memref) { %mapv3 = omp.map_info var_ptr(%map1 : memref, tensor) map_clauses(to) capture(ByRef) -> memref {name = ""} %mapv4 = omp.map_info var_ptr(%map2 : memref, tensor) map_clauses(always, from) capture(ByRef) -> memref {name = ""} - omp.target map_entries(%mapv3, %mapv4 : memref, memref) {} + omp.target map_entries(%mapv3 -> %arg0, %mapv4 -> %arg1 : memref, memref) { + ^bb0(%arg0: memref, %arg1: memref): + omp.terminator + } // CHECK: omp.barrier omp.barrier @@ -2055,8 +2061,11 @@ func.func @omp_targets_with_map_bounds(%arg0: !llvm.ptr, %arg1: !llvm.ptr) -> () %10 = omp.bounds lower_bound(%7 : i64) upper_bound(%6 : i64) stride(%8 : i64) start_idx(%9 : i64) %mapv2 = omp.map_info var_ptr(%arg1 : !llvm.ptr, !llvm.array<10 x i32>) map_clauses(exit_release_or_enter_alloc) capture(ByCopy) bounds(%10) -> !llvm.ptr {name = ""} - // CHECK: omp.target map_entries(%[[MAP0]], %[[MAP1]] : !llvm.ptr, !llvm.ptr) - omp.target map_entries(%mapv1, %mapv2 : !llvm.ptr, !llvm.ptr){} + // CHECK: omp.target map_entries(%[[MAP0]] -> {{.*}}, %[[MAP1]] -> {{.*}} : !llvm.ptr, !llvm.ptr) + omp.target map_entries(%mapv1 -> %arg2, %mapv2 -> %arg3 : !llvm.ptr, !llvm.ptr) { + ^bb0(%arg2: !llvm.ptr, %arg3: !llvm.ptr): + omp.terminator + } // CHECK: omp.target_data map_entries(%[[MAP0]], %[[MAP1]] : !llvm.ptr, !llvm.ptr) omp.target_data map_entries(%mapv1, %mapv2 : !llvm.ptr, !llvm.ptr){} diff --git a/mlir/test/Dialect/Vector/canonicalize.mlir b/mlir/test/Dialect/Vector/canonicalize.mlir index f6bb42b1b24915..163fdd67b0cfd3 100644 --- a/mlir/test/Dialect/Vector/canonicalize.mlir +++ b/mlir/test/Dialect/Vector/canonicalize.mlir @@ -2027,6 +2027,46 @@ func.func @insert_element_invalid_fold() -> vector<1xf32> { return %46 : vector<1xf32> } + +// ----- + +// Do not crash on poison +// CHECK-LABEL: func @insert_poison_fold1 +// CHECK: vector.insertelement +func.func @insert_poison_fold1() -> vector<4xi32> { + %v = ub.poison : vector<4xi32> + %s = arith.constant 7 : i32 + %i = arith.constant 2 : i32 + %1 = vector.insertelement %s, %v[%i : i32] : vector<4xi32> + return %1 : vector<4xi32> +} + +// ----- + +// Do not crash on poison +// CHECK-LABEL: func @insert_poison_fold2 +// CHECK: vector.insertelement +func.func @insert_poison_fold2() -> vector<4xi32> { + %v = arith.constant dense<[0, 1, 2, 3]> : vector<4xi32> + %s = ub.poison : i32 + %i = arith.constant 2 : i32 + %1 = vector.insertelement %s, %v[%i : i32] : vector<4xi32> + return %1 : vector<4xi32> +} + +// ----- + +// Do not crash on poison +// CHECK-LABEL: func @insert_poison_fold3 +// CHECK: vector.insertelement +func.func @insert_poison_fold3() -> vector<4xi32> { + %v = arith.constant dense<[0, 1, 2, 3]> : vector<4xi32> + %s = arith.constant 7 : i32 + %i = ub.poison : i32 + %1 = vector.insertelement %s, %v[%i : i32] : vector<4xi32> + return %1 : vector<4xi32> +} + // ----- // CHECK-LABEL: func @extract_element_fold @@ -2051,6 +2091,30 @@ func.func @extract_element_splat_fold(%a : i32) -> i32 { // ----- +// Do not crash on poison +// CHECK-LABEL: func @extract_element_poison_fold1 +// CHECK: vector.extractelement +func.func @extract_element_poison_fold1() -> i32 { + %v = ub.poison : vector<4xi32> + %i = arith.constant 2 : i32 + %1 = vector.extractelement %v[%i : i32] : vector<4xi32> + return %1 : i32 +} + +// ----- + +// Do not crash on poison +// CHECK-LABEL: func @extract_element_poison_fold2 +// CHECK: vector.extractelement +func.func @extract_element_poison_fold2() -> i32 { + %v = arith.constant dense<[1, 3, 5, 7]> : vector<4xi32> + %i = ub.poison : i32 + %1 = vector.extractelement %v[%i : i32] : vector<4xi32> + return %1 : i32 +} + +// ----- + // CHECK-LABEL: func @reduce_one_element_vector_extract // CHECK-SAME: (%[[V:.+]]: vector<1xf32>) // CHECK: %[[S:.+]] = vector.extract %[[V]][0] : f32 from vector<1xf32> @@ -2436,4 +2500,4 @@ func.func @load_store_forwarding_rank_mismatch(%v0: vector<4x1x1xf32>, %arg0: te permutation_map = affine_map<(d0, d1, d2) -> (d1, 0, d2, 0)>} : tensor<4x4x4xf32>, vector<1x100x4x5xf32> return %r : vector<1x100x4x5xf32> -} \ No newline at end of file +} diff --git a/mlir/test/Target/Cpp/literal_call_operand.mlir b/mlir/test/Target/Cpp/literal_call_operand.mlir index 017b4d53c43e3d..428b66bb2519d9 100644 --- a/mlir/test/Target/Cpp/literal_call_operand.mlir +++ b/mlir/test/Target/Cpp/literal_call_operand.mlir @@ -12,3 +12,15 @@ func.func @emitc_call_operand() { // CPP-DECLTOP: void emitc_call_operand() { // CPP-DECLTOP-NEXT: float v1; // CPP-DECLTOP-NEXT: v1 = foo(M_PI); + +func.func @emitc_call_operand_arg() { + %p0 = emitc.literal "M_PI" : f32 + %1 = emitc.call "bar"(%p0) {args = [42 : i32, 0 : index]} : (f32) -> f32 + return +} +// CPP-DEFAULT: void emitc_call_operand_arg() { +// CPP-DEFAULT-NEXT: float v1 = bar(42, M_PI); + +// CPP-DECLTOP: void emitc_call_operand_arg() { +// CPP-DECLTOP-NEXT: float v1; +// CPP-DECLTOP-NEXT: v1 = bar(42, M_PI); diff --git a/mlir/test/Target/LLVMIR/omptarget-array-sectioning-host.mlir b/mlir/test/Target/LLVMIR/omptarget-array-sectioning-host.mlir index 056085123480ba..307d8a02ce61da 100644 --- a/mlir/test/Target/LLVMIR/omptarget-array-sectioning-host.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-array-sectioning-host.mlir @@ -1,10 +1,10 @@ // RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s // This test checks the offload sizes provided to the OpenMP kernel argument -// structure are correct when lowering to LLVM-IR from MLIR with 3-D bounds -// provided for a 3-D array. One with full default size, and the other with -// a user specified OpenMP array sectioning. We expect the default sized -// array bounds to lower to the full size of the array and the sectioned +// structure are correct when lowering to LLVM-IR from MLIR with 3-D bounds +// provided for a 3-D array. One with full default size, and the other with +// a user specified OpenMP array sectioning. We expect the default sized +// array bounds to lower to the full size of the array and the sectioned // array to be the size of 3*3*1*element-byte-size (36 bytes in this case). module attributes {omp.is_target_device = false} { @@ -18,12 +18,13 @@ module attributes {omp.is_target_device = false} { %6 = omp.bounds lower_bound(%2 : i64) upper_bound(%2 : i64) stride(%2 : i64) start_idx(%2 : i64) %7 = omp.map_info var_ptr(%0 : !llvm.ptr, !llvm.array<3 x array<3 x array<3 x i32>>>) map_clauses(tofrom) capture(ByRef) bounds(%5, %5, %6) -> !llvm.ptr {name = "inarray(1:3,1:3,2:2)"} %8 = omp.map_info var_ptr(%1 : !llvm.ptr, !llvm.array<3 x array<3 x array<3 x i32>>>) map_clauses(tofrom) capture(ByRef) bounds(%5, %5, %5) -> !llvm.ptr {name = "outarray(1:3,1:3,1:3)"} - omp.target map_entries(%7, %8 : !llvm.ptr, !llvm.ptr) { + omp.target map_entries(%7 -> %arg0, %8 -> %arg1 : !llvm.ptr, !llvm.ptr) { + ^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr): %9 = llvm.mlir.constant(0 : i64) : i64 %10 = llvm.mlir.constant(1 : i64) : i64 - %11 = llvm.getelementptr %0[0, %10, %9, %9] : (!llvm.ptr, i64, i64, i64) -> !llvm.ptr, !llvm.array<3 x array<3 x array<3 x i32>>> + %11 = llvm.getelementptr %arg0[0, %10, %9, %9] : (!llvm.ptr, i64, i64, i64) -> !llvm.ptr, !llvm.array<3 x array<3 x array<3 x i32>>> %12 = llvm.load %11 : !llvm.ptr -> i32 - %13 = llvm.getelementptr %1[0, %10, %9, %9] : (!llvm.ptr, i64, i64, i64) -> !llvm.ptr, !llvm.array<3 x array<3 x array<3 x i32>>> + %13 = llvm.getelementptr %arg1[0, %10, %9, %9] : (!llvm.ptr, i64, i64, i64) -> !llvm.ptr, !llvm.array<3 x array<3 x array<3 x i32>>> llvm.store %12, %13 : i32, !llvm.ptr omp.terminator } diff --git a/mlir/test/Target/LLVMIR/omptarget-byref-bycopy-generation-device.mlir b/mlir/test/Target/LLVMIR/omptarget-byref-bycopy-generation-device.mlir index c0c8640bb30bda..875d04f584ca96 100644 --- a/mlir/test/Target/LLVMIR/omptarget-byref-bycopy-generation-device.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-byref-bycopy-generation-device.mlir @@ -6,9 +6,10 @@ module attributes {omp.is_target_device = true} { %1 = llvm.mlir.addressof @_QFEsp : !llvm.ptr %2 = omp.map_info var_ptr(%1 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "sp"} %3 = omp.map_info var_ptr(%0 : !llvm.ptr, i32) map_clauses(to) capture(ByCopy) -> !llvm.ptr {name = "i"} - omp.target map_entries(%2, %3 : !llvm.ptr, !llvm.ptr) { - %4 = llvm.load %0 : !llvm.ptr -> i32 - llvm.store %4, %1 : i32, !llvm.ptr + omp.target map_entries(%2 -> %arg0, %3 -> %arg1 : !llvm.ptr, !llvm.ptr) { + ^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr): + %4 = llvm.load %arg1 : !llvm.ptr -> i32 + llvm.store %4, %arg0 : i32, !llvm.ptr omp.terminator } llvm.return @@ -32,7 +33,7 @@ module attributes {omp.is_target_device = true} { // CHECK: store ptr %[[ARG_BYCOPY]], ptr %[[ALLOCA_BYCOPY]], align 8 // CHECK: user_code.entry: ; preds = %entry -// CHECK: %[[LOAD_BYREF:.*]] = load ptr, ptr %[[ALLOCA_BYREF]], align 8 +// CHECK: %[[LOAD_BYREF:.*]] = load ptr, ptr %[[ALLOCA_BYREF]], align 8 // CHECK: br label %omp.target // CHECK: omp.target: ; preds = %user_code.entry diff --git a/mlir/test/Target/LLVMIR/omptarget-byref-bycopy-generation-host.mlir b/mlir/test/Target/LLVMIR/omptarget-byref-bycopy-generation-host.mlir index ca5dad8b4fc9a8..c8fb4e232f06f5 100644 --- a/mlir/test/Target/LLVMIR/omptarget-byref-bycopy-generation-host.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-byref-bycopy-generation-host.mlir @@ -6,9 +6,10 @@ module attributes {omp.is_target_device = false} { %1 = llvm.mlir.addressof @_QFEsp : !llvm.ptr %2 = omp.map_info var_ptr(%1 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "sp"} %3 = omp.map_info var_ptr(%0 : !llvm.ptr, i32) map_clauses(to) capture(ByCopy) -> !llvm.ptr {name = "i"} - omp.target map_entries(%2, %3 : !llvm.ptr, !llvm.ptr) { - %4 = llvm.load %0 : !llvm.ptr -> i32 - llvm.store %4, %1 : i32, !llvm.ptr + omp.target map_entries(%2 -> %arg0, %3 -> %arg1 : !llvm.ptr, !llvm.ptr) { + ^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr): + %4 = llvm.load %arg1 : !llvm.ptr -> i32 + llvm.store %4, %arg0 : i32, !llvm.ptr omp.terminator } llvm.return diff --git a/mlir/test/Target/LLVMIR/omptarget-declare-target-llvm-device.mlir b/mlir/test/Target/LLVMIR/omptarget-declare-target-llvm-device.mlir index 24795cf70c009e..cf08761981fb3a 100644 --- a/mlir/test/Target/LLVMIR/omptarget-declare-target-llvm-device.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-declare-target-llvm-device.mlir @@ -1,10 +1,10 @@ // RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s // This test the generation of additional load operations for declare target link variables -// inside of target op regions when lowering to IR for device. Unfortunately as the host file is not +// inside of target op regions when lowering to IR for device. Unfortunately as the host file is not // passed as a module attribute, we miss out on the metadata and entryinfo. // -// Unfortunately, only so much can be tested as the device side is dependent on a *.bc +// Unfortunately, only so much can be tested as the device side is dependent on a *.bc // file created by the host and appended as an attribute to the module. module attributes {omp.is_target_device = true} { @@ -13,18 +13,19 @@ module attributes {omp.is_target_device = true} { %0 = llvm.mlir.constant(0 : i32) : i32 llvm.return %0 : i32 } - + llvm.func @_QQmain() attributes {} { %0 = llvm.mlir.addressof @_QMtest_0Esp : !llvm.ptr - + // CHECK-DAG: omp.target: ; preds = %user_code.entry // CHECK-DAG: %[[V:.*]] = load ptr, ptr @_QMtest_0Esp_decl_tgt_ref_ptr, align 8 // CHECK-DAG: store i32 1, ptr %[[V]], align 4 // CHECK-DAG: br label %omp.region.cont %map = omp.map_info var_ptr(%0 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""} - omp.target map_entries(%map : !llvm.ptr) { + omp.target map_entries(%map -> %arg0 : !llvm.ptr) { + ^bb0(%arg0: !llvm.ptr): %1 = llvm.mlir.constant(1 : i32) : i32 - llvm.store %1, %0 : i32, !llvm.ptr + llvm.store %1, %arg0 : i32, !llvm.ptr omp.terminator } diff --git a/mlir/test/Target/LLVMIR/omptarget-region-device-llvm.mlir b/mlir/test/Target/LLVMIR/omptarget-region-device-llvm.mlir index bd399ad935259c..78bab6ece73e6b 100644 --- a/mlir/test/Target/LLVMIR/omptarget-region-device-llvm.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-region-device-llvm.mlir @@ -15,11 +15,12 @@ module attributes {omp.is_target_device = true} { %map1 = omp.map_info var_ptr(%3 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""} %map2 = omp.map_info var_ptr(%5 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""} %map3 = omp.map_info var_ptr(%7 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""} - omp.target map_entries(%map1, %map2, %map3 : !llvm.ptr, !llvm.ptr, !llvm.ptr) { - %8 = llvm.load %3 : !llvm.ptr -> i32 - %9 = llvm.load %5 : !llvm.ptr -> i32 + omp.target map_entries(%map1 -> %arg0, %map2 -> %arg1, %map3 -> %arg2 : !llvm.ptr, !llvm.ptr, !llvm.ptr) { + ^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr, %arg2: !llvm.ptr): + %8 = llvm.load %arg0 : !llvm.ptr -> i32 + %9 = llvm.load %arg1 : !llvm.ptr -> i32 %10 = llvm.add %8, %9 : i32 - llvm.store %10, %7 : i32, !llvm.ptr + llvm.store %10, %arg2 : i32, !llvm.ptr omp.terminator } llvm.return diff --git a/mlir/test/Target/LLVMIR/omptarget-region-llvm-target-device.mlir b/mlir/test/Target/LLVMIR/omptarget-region-llvm-target-device.mlir index 2cd0331087ec01..6fa039f522e206 100644 --- a/mlir/test/Target/LLVMIR/omptarget-region-llvm-target-device.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-region-llvm-target-device.mlir @@ -3,10 +3,11 @@ // RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s module attributes {omp.is_target_device = true} { - llvm.func @writeindex_omp_outline_0_(%arg0: !llvm.ptr, %arg1: !llvm.ptr) attributes {omp.outline_parent_name = "writeindex_"} { - %0 = omp.map_info var_ptr(%arg0 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""} - %1 = omp.map_info var_ptr(%arg1 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""} - omp.target map_entries(%0, %1 : !llvm.ptr, !llvm.ptr) { + llvm.func @writeindex_omp_outline_0_(%val0: !llvm.ptr, %val1: !llvm.ptr) attributes {omp.outline_parent_name = "writeindex_"} { + %0 = omp.map_info var_ptr(%val0 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""} + %1 = omp.map_info var_ptr(%val1 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""} + omp.target map_entries(%0 -> %arg0, %1 -> %arg1 : !llvm.ptr, !llvm.ptr) { + ^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr): %2 = llvm.mlir.constant(20 : i32) : i32 %3 = llvm.mlir.constant(10 : i32) : i32 llvm.store %3, %arg0 : i32, !llvm.ptr diff --git a/mlir/test/Target/LLVMIR/omptarget-region-llvm.mlir b/mlir/test/Target/LLVMIR/omptarget-region-llvm.mlir index 4e89b8585c7525..b861dd7a7d315f 100644 --- a/mlir/test/Target/LLVMIR/omptarget-region-llvm.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-region-llvm.mlir @@ -15,11 +15,12 @@ module attributes {omp.is_target_device = false} { %map1 = omp.map_info var_ptr(%3 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""} %map2 = omp.map_info var_ptr(%5 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""} %map3 = omp.map_info var_ptr(%7 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""} - omp.target map_entries(%map1, %map2, %map3 : !llvm.ptr, !llvm.ptr, !llvm.ptr) { - %8 = llvm.load %3 : !llvm.ptr -> i32 - %9 = llvm.load %5 : !llvm.ptr -> i32 + omp.target map_entries(%map1 -> %arg0, %map2 -> %arg1, %map3 -> %arg2 : !llvm.ptr, !llvm.ptr, !llvm.ptr) { + ^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr, %arg2: !llvm.ptr): + %8 = llvm.load %arg0 : !llvm.ptr -> i32 + %9 = llvm.load %arg1 : !llvm.ptr -> i32 %10 = llvm.add %8, %9 : i32 - llvm.store %10, %7 : i32, !llvm.ptr + llvm.store %10, %arg2 : i32, !llvm.ptr omp.terminator } llvm.return diff --git a/mlir/test/Target/LLVMIR/omptarget-region-parallel-llvm.mlir b/mlir/test/Target/LLVMIR/omptarget-region-parallel-llvm.mlir index 1d8799ecd446f0..c80ea1f0a47be7 100644 --- a/mlir/test/Target/LLVMIR/omptarget-region-parallel-llvm.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-region-parallel-llvm.mlir @@ -15,12 +15,13 @@ module attributes {omp.is_target_device = false} { %map1 = omp.map_info var_ptr(%3 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""} %map2 = omp.map_info var_ptr(%5 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""} %map3 = omp.map_info var_ptr(%7 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""} - omp.target map_entries( %map1, %map2, %map3 : !llvm.ptr, !llvm.ptr, !llvm.ptr) { + omp.target map_entries( %map1 -> %arg0, %map2 -> %arg1, %map3 -> %arg2 : !llvm.ptr, !llvm.ptr, !llvm.ptr) { + ^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr, %arg2: !llvm.ptr): omp.parallel { - %8 = llvm.load %3 : !llvm.ptr -> i32 - %9 = llvm.load %5 : !llvm.ptr -> i32 + %8 = llvm.load %arg0 : !llvm.ptr -> i32 + %9 = llvm.load %arg1 : !llvm.ptr -> i32 %10 = llvm.add %8, %9 : i32 - llvm.store %10, %7 : i32, !llvm.ptr + llvm.store %10, %arg2 : i32, !llvm.ptr omp.terminator } omp.terminator diff --git a/mlir/test/python/ir/builtin_types.py b/mlir/test/python/ir/builtin_types.py index 672418b5383ae4..d4fed86b4f135e 100644 --- a/mlir/test/python/ir/builtin_types.py +++ b/mlir/test/python/ir/builtin_types.py @@ -300,7 +300,7 @@ def testVectorType(): none = NoneType.get() try: - vector_invalid = VectorType.get(shape, none) + VectorType.get(shape, none) except MLIRError as e: # CHECK: Invalid type: # CHECK: error: unknown: vector elements must be int/index/float type but got 'none' @@ -308,6 +308,46 @@ def testVectorType(): else: print("Exception not produced") + scalable_1 = VectorType.get(shape, f32, scalable=[False, True]) + scalable_2 = VectorType.get([2, 3, 4], f32, scalable=[True, False, True]) + assert scalable_1.scalable + assert scalable_2.scalable + assert scalable_1.scalable_dims == [False, True] + assert scalable_2.scalable_dims == [True, False, True] + # CHECK: scalable 1: vector<2x[3]xf32> + print("scalable 1: ", scalable_1) + # CHECK: scalable 2: vector<[2]x3x[4]xf32> + print("scalable 2: ", scalable_2) + + scalable_3 = VectorType.get(shape, f32, scalable_dims=[1]) + scalable_4 = VectorType.get([2, 3, 4], f32, scalable_dims=[0, 2]) + assert scalable_3 == scalable_1 + assert scalable_4 == scalable_2 + + try: + VectorType.get(shape, f32, scalable=[False, True, True]) + except ValueError as e: + # CHECK: Expected len(scalable) == len(shape). + print(e) + else: + print("Exception not produced") + + try: + VectorType.get(shape, f32, scalable=[False, True], scalable_dims=[1]) + except ValueError as e: + # CHECK: kwargs are mutually exclusive. + print(e) + else: + print("Exception not produced") + + try: + VectorType.get(shape, f32, scalable_dims=[42]) + except ValueError as e: + # CHECK: Scalable dimension index out of bounds. + print(e) + else: + print("Exception not produced") + # CHECK-LABEL: TEST: testRankedTensorType @run @@ -337,7 +377,6 @@ def testRankedTensorType(): assert RankedTensorType.get(shape, f32).encoding is None - # CHECK-LABEL: TEST: testUnrankedTensorType @run def testUnrankedTensorType(): diff --git a/mlir/unittests/Dialect/LLVMIR/LLVMTypeTest.cpp b/mlir/unittests/Dialect/LLVMIR/LLVMTypeTest.cpp index 2d9f8d0e607499..083dec819a0e05 100644 --- a/mlir/unittests/Dialect/LLVMIR/LLVMTypeTest.cpp +++ b/mlir/unittests/Dialect/LLVMIR/LLVMTypeTest.cpp @@ -17,37 +17,3 @@ TEST_F(LLVMIRTest, IsStructTypeMutable) { ASSERT_TRUE(bool(structTy)); ASSERT_TRUE(structTy.hasTrait()); } - -TEST_F(LLVMIRTest, MutualReferencedSubElementTypes) { - auto fooStructTy = LLVMStructType::getIdentified(&context, "foo"); - ASSERT_TRUE(bool(fooStructTy)); - auto barStructTy = LLVMStructType::getIdentified(&context, "bar"); - ASSERT_TRUE(bool(barStructTy)); - - // Created two structs that are referencing each other. - Type fooBody[] = {LLVMPointerType::get(barStructTy)}; - ASSERT_TRUE(succeeded(fooStructTy.setBody(fooBody, /*isPacked=*/false))); - Type barBody[] = {LLVMPointerType::get(fooStructTy)}; - ASSERT_TRUE(succeeded(barStructTy.setBody(barBody, /*isPacked=*/false))); - - // Test if walkSubElements goes into infinite loops. - SmallVector subElementTypes; - fooStructTy.walk([&](Type type) { subElementTypes.push_back(type); }); - ASSERT_EQ(subElementTypes.size(), 4U); - - // !llvm.ptr - ASSERT_TRUE(isa(subElementTypes[0])); - - // !llvm.struct<"bar",...> - auto structType = dyn_cast(subElementTypes[1]); - ASSERT_TRUE(bool(structType)); - ASSERT_TRUE(structType.getName().equals("bar")); - - // !llvm.ptr - ASSERT_TRUE(isa(subElementTypes[2])); - - // !llvm.struct<"foo",...> - structType = dyn_cast(subElementTypes[3]); - ASSERT_TRUE(bool(structType)); - ASSERT_TRUE(structType.getName().equals("foo")); -} diff --git a/openmp/libomptarget/DeviceRTL/include/Debug.h b/openmp/libomptarget/DeviceRTL/include/Debug.h index a06de60b7b542e..33f7dfc7660cd2 100644 --- a/openmp/libomptarget/DeviceRTL/include/Debug.h +++ b/openmp/libomptarget/DeviceRTL/include/Debug.h @@ -37,7 +37,8 @@ void __assert_fail_internal(const char *expr, const char *msg, const char *file, } #define UNREACHABLE(msg) \ PRINT(msg); \ - __builtin_trap(); + __builtin_trap(); \ + __builtin_unreachable(); ///} diff --git a/openmp/libomptarget/test/offloading/fortran/basic-target-region-3D-array.f90 b/openmp/libomptarget/test/offloading/fortran/basic-target-region-3D-array.f90 index 44ff394dcda16b..abc2763d4a30cc 100644 --- a/openmp/libomptarget/test/offloading/fortran/basic-target-region-3D-array.f90 +++ b/openmp/libomptarget/test/offloading/fortran/basic-target-region-3D-array.f90 @@ -23,7 +23,7 @@ program main end do i = 1 -j = 1 +j = 1 k = 1 !$omp target map(tofrom:x, counter) map(to: i, j, k, i2, j2, k2) @@ -50,5 +50,12 @@ program main end do end do end program main - -! CHECK: 1 2 3 4 5 6 7 8 + +! CHECK: 1 +! CHECK: 2 +! CHECK: 3 +! CHECK: 4 +! CHECK: 5 +! CHECK: 6 +! CHECK: 7 +! CHECK: 8 diff --git a/openmp/libomptarget/test/offloading/fortran/failing/target_map_common_block2.f90 b/openmp/libomptarget/test/offloading/fortran/target_map_common_block2.f90 similarity index 85% rename from openmp/libomptarget/test/offloading/fortran/failing/target_map_common_block2.f90 rename to openmp/libomptarget/test/offloading/fortran/target_map_common_block2.f90 index 1f95ef7c460757..8a9c47545fbd78 100644 --- a/openmp/libomptarget/test/offloading/fortran/failing/target_map_common_block2.f90 +++ b/openmp/libomptarget/test/offloading/fortran/target_map_common_block2.f90 @@ -7,7 +7,6 @@ ! UNSUPPORTED: x86_64-pc-linux-gnu-LTO ! RUN: %libomptarget-compile-fortran-run-and-check-generic -! XFAIL: * program main use omp_lib @@ -15,11 +14,11 @@ program main common var4 var4 = 24 tmp = 12 - print *, "var4 before target = ", var4 + print *, "var4 before target =", var4 !$omp target map(tofrom:var4) var4 = tmp !$omp end target - print *, "var4 after target = ", var4 + print *, "var4 after target =", var4 end program ! CHECK: var4 before target = 24 diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index 413c516185a071..2f92f98dd413be 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -5318,6 +5318,7 @@ cc_library( ":GPUDialect", ":GPUPassIncGen", ":Pass", + ":SPIRVDialect", ":Support", "//llvm:Support", ], @@ -5945,6 +5946,7 @@ cc_library( ":NVVMToLLVMIRTranslation", ":TargetLLVM", ":ToLLVMIRTranslation", + "//llvm:NVPTXCodeGen", "//llvm:Support", ], ) @@ -6528,11 +6530,12 @@ cc_library( cc_library( name = "SPIRVTarget", srcs = ["lib/Target/SPIRV/Target.cpp"], - hdrs = glob(["include/mlir/Target/SPIRV/*.h"]), + hdrs = ["include/mlir/Target/SPIRV/Target.h"], includes = ["include"], deps = [ ":GPUDialect", ":SPIRVDialect", + ":SPIRVSerialization", ], )