Skip to content

Commit

Permalink
[mlir][LLVMIR][OpenMP] Move reduction allocas before stores (llvm#105683
Browse files Browse the repository at this point in the history
)

Delay implicit reduction var stores until after all allocas. This fixes
a couple of cases in existing unit tests.

Follow up to llvm#102524
  • Loading branch information
tblah committed Aug 27, 2024
1 parent c50d11e commit 7c188ab
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 15 deletions.
4 changes: 2 additions & 2 deletions flang/test/Lower/OpenMP/parallel-reduction-mixed.f90
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,10 @@ end subroutine proc
!CHECK: store i32 %[[TID]], ptr %[[TID_LOCAL]]
!CHECK: %[[F_priv:.*]] = alloca ptr
!CHECK: %[[I_priv:.*]] = alloca i32
!CHECK: store ptr %{{.*}}, ptr %[[F_priv]]

!CHECK: omp.reduction.init:
!CHECK: store i32 0, ptr %[[I_priv]]
!CHECK: store ptr %{{.*}}, ptr %[[F_priv]]
!CHECK: store i32 0, ptr %[[I_priv]]

!CHECK: omp.par.region:
!CHECK: br label %[[MALLOC_BB:.*]]
Expand Down
47 changes: 35 additions & 12 deletions mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -592,7 +592,20 @@ convertOmpOrderedRegion(Operation &opInst, llvm::IRBuilderBase &builder,
return bodyGenStatus;
}

namespace {
/// Contains the arguments for an LLVM store operation
struct DeferredStore {
DeferredStore(llvm::Value *value, llvm::Value *address)
: value(value), address(address) {}

llvm::Value *value;
llvm::Value *address;
};
} // namespace

/// Allocate space for privatized reduction variables.
/// `deferredStores` contains information to create store operations which needs
/// to be inserted after all allocas
template <typename T>
static LogicalResult
allocReductionVars(T loop, ArrayRef<BlockArgument> reductionArgs,
Expand All @@ -602,13 +615,13 @@ allocReductionVars(T loop, ArrayRef<BlockArgument> reductionArgs,
SmallVectorImpl<omp::DeclareReductionOp> &reductionDecls,
SmallVectorImpl<llvm::Value *> &privateReductionVariables,
DenseMap<Value, llvm::Value *> &reductionVariableMap,
SmallVectorImpl<DeferredStore> &deferredStores,
llvm::ArrayRef<bool> isByRefs) {
llvm::IRBuilderBase::InsertPointGuard guard(builder);
builder.SetInsertPoint(allocaIP.getBlock()->getTerminator());

// delay creating stores until after all allocas
SmallVector<std::pair<llvm::Value *, llvm::Value *>> storesToCreate;
storesToCreate.reserve(loop.getNumReductionVars());
deferredStores.reserve(loop.getNumReductionVars());

for (std::size_t i = 0; i < loop.getNumReductionVars(); ++i) {
Region &allocRegion = reductionDecls[i].getAllocRegion();
Expand All @@ -628,7 +641,7 @@ allocReductionVars(T loop, ArrayRef<BlockArgument> reductionArgs,
// variable allocated in the inlined region)
llvm::Value *var = builder.CreateAlloca(
moduleTranslation.convertType(reductionDecls[i].getType()));
storesToCreate.emplace_back(phis[0], var);
deferredStores.emplace_back(phis[0], var);

privateReductionVariables[i] = var;
moduleTranslation.mapValue(reductionArgs[i], phis[0]);
Expand All @@ -644,10 +657,6 @@ allocReductionVars(T loop, ArrayRef<BlockArgument> reductionArgs,
}
}

// TODO: further delay this so it doesn't come in the entry block at all
for (auto [data, addr] : storesToCreate)
builder.CreateStore(data, addr);

return success();
}

Expand Down Expand Up @@ -819,12 +828,19 @@ static LogicalResult allocAndInitializeReductionVars(
if (op.getNumReductionVars() == 0)
return success();

SmallVector<DeferredStore> deferredStores;

if (failed(allocReductionVars(op, reductionArgs, builder, moduleTranslation,
allocaIP, reductionDecls,
privateReductionVariables, reductionVariableMap,
isByRef)))
deferredStores, isByRef)))
return failure();

// store result of the alloc region to the allocated pointer to the real
// reduction variable
for (auto [data, addr] : deferredStores)
builder.CreateStore(data, addr);

// Before the loop, store the initial values of reductions into reduction
// variables. Although this could be done after allocas, we don't want to mess
// up with the alloca insertion point.
Expand Down Expand Up @@ -1359,6 +1375,7 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
collectReductionDecls(opInst, reductionDecls);
SmallVector<llvm::Value *> privateReductionVariables(
opInst.getNumReductionVars());
SmallVector<DeferredStore> deferredStores;

auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
// Allocate reduction vars
Expand All @@ -1373,10 +1390,10 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
InsertPointTy(allocaIP.getBlock(),
allocaIP.getBlock()->getTerminator()->getIterator());

if (failed(allocReductionVars(opInst, reductionArgs, builder,
moduleTranslation, allocaIP, reductionDecls,
privateReductionVariables,
reductionVariableMap, isByRef)))
if (failed(allocReductionVars(
opInst, reductionArgs, builder, moduleTranslation, allocaIP,
reductionDecls, privateReductionVariables, reductionVariableMap,
deferredStores, isByRef)))
bodyGenStatus = failure();

// Initialize reduction vars
Expand All @@ -1401,6 +1418,12 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,

builder.SetInsertPoint(initBlock->getFirstNonPHIOrDbgOrAlloca());

// insert stores deferred until after all allocas
// these store the results of the alloc region into the allocation for the
// pointer to the reduction variable
for (auto [data, addr] : deferredStores)
builder.CreateStore(data, addr);

for (unsigned i = 0; i < opInst.getNumReductionVars(); ++i) {
SmallVector<llvm::Value *> phis;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,6 @@ llvm.func @sectionsreduction_(%arg0: !llvm.ptr {fir.bindc_name = "x"}) attribute
// CHECK: %[[VAL_13:.*]] = load i32, ptr %[[VAL_10]], align 4
// CHECK: %[[VAL_20:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, i64 1, align 8
// CHECK: %[[VAL_21:.*]] = alloca ptr, align 8
// CHECK: store ptr %[[VAL_20]], ptr %[[VAL_21]], align 8
// CHECK: %[[VAL_14:.*]] = alloca [1 x ptr], align 8
// CHECK: br label %[[VAL_15:.*]]
// CHECK: omp.reduction.init: ; preds = %[[VAL_16:.*]]
Expand All @@ -98,6 +97,7 @@ llvm.func @sectionsreduction_(%arg0: !llvm.ptr {fir.bindc_name = "x"}) attribute
// CHECK: br label %[[VAL_18:.*]]
// CHECK: omp.par.region1: ; preds = %[[VAL_17]]
// CHECK: %[[VAL_19:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, i64 1, align 8
// CHECK: store ptr %[[VAL_20]], ptr %[[VAL_21]], align 8
// CHECK: br label %[[VAL_22:.*]]
// CHECK: omp_section_loop.preheader: ; preds = %[[VAL_18]]
// CHECK: store i32 0, ptr %[[VAL_7]], align 4
Expand Down

0 comments on commit 7c188ab

Please sign in to comment.