Skip to content

Commit

Permalink
[mlir][AMDGPU] New gfx12 barrier instructions and update lowering LDS…
Browse files Browse the repository at this point in the history
…BarrierOp (llvm#109273)

New gfx12 barrier instructions: s.barrier.signal, s.barrier.wait and
s.wait.dscnt. And update lowering LDSBarrierOp accordingly.

CC: @krzysz00 @manupak @giuseros
  • Loading branch information
dhernandez0 authored Sep 20, 2024
1 parent c24418a commit b014265
Show file tree
Hide file tree
Showing 5 changed files with 98 additions and 22 deletions.
25 changes: 23 additions & 2 deletions mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
Original file line number Diff line number Diff line change
Expand Up @@ -88,11 +88,12 @@ class ROCDL_IntrPure1Op<string mnemonic> :

class ROCDL_IntrOp<string mnemonic, list<int> overloadedResults,
list<int> overloadedOperands, list<Trait> traits, int numResults,
int requiresAccessGroup = 0, int requiresAliasAnalysis = 0> :
int requiresAccessGroup = 0, int requiresAliasAnalysis = 0, list<int> immArgPositions = [],
list<string> immArgAttrNames = []> :
LLVM_IntrOpBase<ROCDL_Dialect, mnemonic,
"amdgcn_" # !subst(".", "_", mnemonic), overloadedResults,
overloadedOperands, traits, numResults, requiresAccessGroup,
requiresAliasAnalysis>;
requiresAliasAnalysis, 0, immArgPositions, immArgAttrNames>;

//===----------------------------------------------------------------------===//
// ROCDL special register op definitions
Expand Down Expand Up @@ -255,6 +256,26 @@ def ROCDL_BarrierOp : ROCDL_Op<"barrier"> {
let assemblyFormat = "attr-dict";
}

def ROCDL_BarrierSignalOp : ROCDL_IntrOp<"s.barrier.signal", [], [], [], 0, 0, 0, [0], ["id"]>,
Arguments<(ins I32Attr:$id)> {
let results = (outs);
let assemblyFormat = "$id attr-dict";
}

def ROCDL_BarrierWaitOp : ROCDL_IntrOp<"s.barrier.wait", [], [], [], 0, 0, 0, [0], ["id"]>,
Arguments<(ins I16Attr:$id)> {
let results = (outs);
let assemblyFormat = "$id attr-dict";
string llvmBuilder =
"createIntrinsicCall(builder, llvm::Intrinsic::amdgcn_s_barrier_wait,builder.getInt16(op.getId()));";
}

def ROCDL_WaitDscntOp: ROCDL_IntrOp<"s.wait.dscnt", [], [], [], 0, 0, 0, [0], ["id"]>,
Arguments<(ins I16Attr:$id)> {
let results = (outs);
let assemblyFormat = "$id attr-dict";
}

def ROCDL_SetPrioOp : ROCDL_IntrOp<"s.setprio", [], [], [], 0>,
Arguments<(ins I16Attr:$priority)> {
let results = (outs);
Expand Down
48 changes: 28 additions & 20 deletions mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -301,27 +301,35 @@ struct LDSBarrierOpLowering : public ConvertOpToLLVMPattern<LDSBarrierOp> {
/*operand_attrs=*/ArrayAttr());
return success();
}
constexpr int32_t ldsOnlyBitsGfx6789 = ~(0x1f << 8);
constexpr int32_t ldsOnlyBitsGfx10 = ~(0x3f << 8);
// Left in place in case someone disables the inline ASM path or future
// chipsets use the same bit pattern.
constexpr int32_t ldsOnlyBitsGfx11 = ~(0x3f << 4);

int32_t ldsOnlyBits;
if (chipset.majorVersion == 11)
ldsOnlyBits = ldsOnlyBitsGfx11;
else if (chipset.majorVersion == 10)
ldsOnlyBits = ldsOnlyBitsGfx10;
else if (chipset.majorVersion <= 9)
ldsOnlyBits = ldsOnlyBitsGfx6789;
else
return op.emitOpError(
"don't know how to lower this for chipset major version")
<< chipset.majorVersion;
if (chipset.majorVersion < 12) {
constexpr int32_t ldsOnlyBitsGfx6789 = ~(0x1f << 8);
constexpr int32_t ldsOnlyBitsGfx10 = ~(0x3f << 8);
// Left in place in case someone disables the inline ASM path or future
// chipsets use the same bit pattern.
constexpr int32_t ldsOnlyBitsGfx11 = ~(0x3f << 4);

int32_t ldsOnlyBits;
if (chipset.majorVersion == 11)
ldsOnlyBits = ldsOnlyBitsGfx11;
else if (chipset.majorVersion == 10)
ldsOnlyBits = ldsOnlyBitsGfx10;
else if (chipset.majorVersion <= 9)
ldsOnlyBits = ldsOnlyBitsGfx6789;
else
return op.emitOpError(
"don't know how to lower this for chipset major version")
<< chipset.majorVersion;

Location loc = op->getLoc();
rewriter.create<ROCDL::WaitcntOp>(loc, ldsOnlyBits);
rewriter.replaceOpWithNewOp<ROCDL::SBarrierOp>(op);
} else {
Location loc = op->getLoc();
rewriter.create<ROCDL::WaitDscntOp>(loc, 0);
rewriter.create<ROCDL::BarrierSignalOp>(loc, -1);
rewriter.replaceOpWithNewOp<ROCDL::BarrierWaitOp>(op, -1);
}

Location loc = op->getLoc();
rewriter.create<ROCDL::WaitcntOp>(loc, ldsOnlyBits);
rewriter.replaceOpWithNewOp<ROCDL::SBarrierOp>(op);
return success();
}
};
Expand Down
4 changes: 4 additions & 0 deletions mlir/test/Conversion/AMDGPUToROCDL/amdgpu-to-rocdl.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
// RUN: mlir-opt %s -convert-amdgpu-to-rocdl=chipset=gfx90a | FileCheck %s --check-prefixes=CHECK,GFX9,GFX90A
// RUN: mlir-opt %s -convert-amdgpu-to-rocdl=chipset=gfx1030 | FileCheck %s --check-prefixes=CHECK,GFX10,RDNA
// RUN: mlir-opt %s -convert-amdgpu-to-rocdl=chipset=gfx1100 | FileCheck %s --check-prefixes=CHECK,GFX11,RDNA
// RUN: mlir-opt %s -convert-amdgpu-to-rocdl=chipset=gfx1201 | FileCheck %s --check-prefixes=CHECK,GFX12,RDNA

// CHECK-LABEL: func @gpu_gcn_raw_buffer_load_scalar_i32
func.func @gpu_gcn_raw_buffer_load_scalar_i32(%buf: memref<i32>) -> i32 {
Expand Down Expand Up @@ -246,6 +247,9 @@ func.func @lds_barrier() {
// GFX10-NEXT: rocdl.s.barrier
// GFX11: llvm.inline_asm has_side_effects asm_dialect = att
// GFX11-SAME: ";;;WARNING: BREAKS DEBUG WATCHES\0As_waitcnt lgkmcnt(0)\0As_barrier"
// GFX12: rocdl.s.wait.dscnt 0
// GFX12-NEXT: rocdl.s.barrier.signal -1
// GFX12-NEXT: rocdl.s.barrier.wait -1
amdgpu.lds_barrier
func.return
}
Expand Down
22 changes: 22 additions & 0 deletions mlir/test/Dialect/LLVMIR/rocdl.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -352,6 +352,28 @@ llvm.func @rocdl.s.barrier() {
rocdl.s.barrier
llvm.return
}

llvm.func @rocdl.s.barrier.signal() {
// CHECK-LABEL: rocdl.s.barrier.signal
// CHECK: rocdl.s.barrier.signal -1
rocdl.s.barrier.signal -1
llvm.return
}

llvm.func @rocdl.s.barrier.wait() {
// CHECK-LABEL: rocdl.s.barrier.wait
// CHECK: rocdl.s.barrier.wait -1
rocdl.s.barrier.wait -1
llvm.return
}

llvm.func @rocdl.s.wait.dscnt() {
// CHECK-LABEL: rocdl.s.wait.dscnt
// CHECK: rocdl.s.wait.dscnt 0
rocdl.s.wait.dscnt 0
llvm.return
}

// -----

// expected-error@below {{attribute attached to unexpected op}}
Expand Down
21 changes: 21 additions & 0 deletions mlir/test/Target/LLVMIR/rocdl.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,27 @@ llvm.func @rocdl.barrier() {
llvm.return
}

llvm.func @rocdl.s.barrier.signal() {
// CHECK-LABEL: rocdl.s.barrier.signal
// CHECK-NEXT: call void @llvm.amdgcn.s.barrier.signal(i32 -1)
rocdl.s.barrier.signal -1
llvm.return
}

llvm.func @rocdl.s.barrier.wait() {
// CHECK-LABEL: rocdl.s.barrier.wait
// CHECK-NEXT: call void @llvm.amdgcn.s.barrier.wait(i16 -1)
rocdl.s.barrier.wait -1
llvm.return
}

llvm.func @rocdl.s.wait.dscnt() {
// CHECK-LABEL: rocdl.s.wait.dscnt
// CHECK-NEXT: call void @llvm.amdgcn.s.wait.dscnt(i16 0)
rocdl.s.wait.dscnt 0
llvm.return
}

llvm.func @rocdl.setprio() {
// CHECK: call void @llvm.amdgcn.s.setprio(i16 0)
rocdl.s.setprio 0
Expand Down

0 comments on commit b014265

Please sign in to comment.