From b014265d992ce515826a8d3ce378ca1f643e297d Mon Sep 17 00:00:00 2001 From: Daniel Hernandez-Juarez Date: Sat, 21 Sep 2024 00:41:36 +0200 Subject: [PATCH] [mlir][AMDGPU] New gfx12 barrier instructions and update lowering LDSBarrierOp (#109273) New gfx12 barrier instructions: s.barrier.signal, s.barrier.wait and s.wait.dscnt. And update lowering LDSBarrierOp accordingly. CC: @krzysz00 @manupak @giuseros --- mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td | 25 +++++++++- .../AMDGPUToROCDL/AMDGPUToROCDL.cpp | 48 +++++++++++-------- .../AMDGPUToROCDL/amdgpu-to-rocdl.mlir | 4 ++ mlir/test/Dialect/LLVMIR/rocdl.mlir | 22 +++++++++ mlir/test/Target/LLVMIR/rocdl.mlir | 21 ++++++++ 5 files changed, 98 insertions(+), 22 deletions(-) diff --git a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td index 523d719ae336fd..aae2cf88ded041 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td @@ -88,11 +88,12 @@ class ROCDL_IntrPure1Op : class ROCDL_IntrOp overloadedResults, list overloadedOperands, list traits, int numResults, - int requiresAccessGroup = 0, int requiresAliasAnalysis = 0> : + int requiresAccessGroup = 0, int requiresAliasAnalysis = 0, list immArgPositions = [], + list immArgAttrNames = []> : LLVM_IntrOpBase; + requiresAliasAnalysis, 0, immArgPositions, immArgAttrNames>; //===----------------------------------------------------------------------===// // ROCDL special register op definitions @@ -255,6 +256,26 @@ def ROCDL_BarrierOp : ROCDL_Op<"barrier"> { let assemblyFormat = "attr-dict"; } +def ROCDL_BarrierSignalOp : ROCDL_IntrOp<"s.barrier.signal", [], [], [], 0, 0, 0, [0], ["id"]>, + Arguments<(ins I32Attr:$id)> { + let results = (outs); + let assemblyFormat = "$id attr-dict"; +} + +def ROCDL_BarrierWaitOp : ROCDL_IntrOp<"s.barrier.wait", [], [], [], 0, 0, 0, [0], ["id"]>, + Arguments<(ins I16Attr:$id)> { + let results = (outs); + let assemblyFormat = "$id attr-dict"; + string llvmBuilder = + "createIntrinsicCall(builder, llvm::Intrinsic::amdgcn_s_barrier_wait,builder.getInt16(op.getId()));"; +} + +def ROCDL_WaitDscntOp: ROCDL_IntrOp<"s.wait.dscnt", [], [], [], 0, 0, 0, [0], ["id"]>, + Arguments<(ins I16Attr:$id)> { + let results = (outs); + let assemblyFormat = "$id attr-dict"; +} + def ROCDL_SetPrioOp : ROCDL_IntrOp<"s.setprio", [], [], [], 0>, Arguments<(ins I16Attr:$priority)> { let results = (outs); diff --git a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp index f80d2793eaef59..7112d1607dfdca 100644 --- a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp +++ b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp @@ -301,27 +301,35 @@ struct LDSBarrierOpLowering : public ConvertOpToLLVMPattern { /*operand_attrs=*/ArrayAttr()); return success(); } - constexpr int32_t ldsOnlyBitsGfx6789 = ~(0x1f << 8); - constexpr int32_t ldsOnlyBitsGfx10 = ~(0x3f << 8); - // Left in place in case someone disables the inline ASM path or future - // chipsets use the same bit pattern. - constexpr int32_t ldsOnlyBitsGfx11 = ~(0x3f << 4); - - int32_t ldsOnlyBits; - if (chipset.majorVersion == 11) - ldsOnlyBits = ldsOnlyBitsGfx11; - else if (chipset.majorVersion == 10) - ldsOnlyBits = ldsOnlyBitsGfx10; - else if (chipset.majorVersion <= 9) - ldsOnlyBits = ldsOnlyBitsGfx6789; - else - return op.emitOpError( - "don't know how to lower this for chipset major version") - << chipset.majorVersion; + if (chipset.majorVersion < 12) { + constexpr int32_t ldsOnlyBitsGfx6789 = ~(0x1f << 8); + constexpr int32_t ldsOnlyBitsGfx10 = ~(0x3f << 8); + // Left in place in case someone disables the inline ASM path or future + // chipsets use the same bit pattern. + constexpr int32_t ldsOnlyBitsGfx11 = ~(0x3f << 4); + + int32_t ldsOnlyBits; + if (chipset.majorVersion == 11) + ldsOnlyBits = ldsOnlyBitsGfx11; + else if (chipset.majorVersion == 10) + ldsOnlyBits = ldsOnlyBitsGfx10; + else if (chipset.majorVersion <= 9) + ldsOnlyBits = ldsOnlyBitsGfx6789; + else + return op.emitOpError( + "don't know how to lower this for chipset major version") + << chipset.majorVersion; + + Location loc = op->getLoc(); + rewriter.create(loc, ldsOnlyBits); + rewriter.replaceOpWithNewOp(op); + } else { + Location loc = op->getLoc(); + rewriter.create(loc, 0); + rewriter.create(loc, -1); + rewriter.replaceOpWithNewOp(op, -1); + } - Location loc = op->getLoc(); - rewriter.create(loc, ldsOnlyBits); - rewriter.replaceOpWithNewOp(op); return success(); } }; diff --git a/mlir/test/Conversion/AMDGPUToROCDL/amdgpu-to-rocdl.mlir b/mlir/test/Conversion/AMDGPUToROCDL/amdgpu-to-rocdl.mlir index 9f4db151043455..7fd5610a88913e 100644 --- a/mlir/test/Conversion/AMDGPUToROCDL/amdgpu-to-rocdl.mlir +++ b/mlir/test/Conversion/AMDGPUToROCDL/amdgpu-to-rocdl.mlir @@ -2,6 +2,7 @@ // RUN: mlir-opt %s -convert-amdgpu-to-rocdl=chipset=gfx90a | FileCheck %s --check-prefixes=CHECK,GFX9,GFX90A // RUN: mlir-opt %s -convert-amdgpu-to-rocdl=chipset=gfx1030 | FileCheck %s --check-prefixes=CHECK,GFX10,RDNA // RUN: mlir-opt %s -convert-amdgpu-to-rocdl=chipset=gfx1100 | FileCheck %s --check-prefixes=CHECK,GFX11,RDNA +// RUN: mlir-opt %s -convert-amdgpu-to-rocdl=chipset=gfx1201 | FileCheck %s --check-prefixes=CHECK,GFX12,RDNA // CHECK-LABEL: func @gpu_gcn_raw_buffer_load_scalar_i32 func.func @gpu_gcn_raw_buffer_load_scalar_i32(%buf: memref) -> i32 { @@ -246,6 +247,9 @@ func.func @lds_barrier() { // GFX10-NEXT: rocdl.s.barrier // GFX11: llvm.inline_asm has_side_effects asm_dialect = att // GFX11-SAME: ";;;WARNING: BREAKS DEBUG WATCHES\0As_waitcnt lgkmcnt(0)\0As_barrier" + // GFX12: rocdl.s.wait.dscnt 0 + // GFX12-NEXT: rocdl.s.barrier.signal -1 + // GFX12-NEXT: rocdl.s.barrier.wait -1 amdgpu.lds_barrier func.return } diff --git a/mlir/test/Dialect/LLVMIR/rocdl.mlir b/mlir/test/Dialect/LLVMIR/rocdl.mlir index f5dd5721c45e6f..397d66d92bc5d5 100644 --- a/mlir/test/Dialect/LLVMIR/rocdl.mlir +++ b/mlir/test/Dialect/LLVMIR/rocdl.mlir @@ -352,6 +352,28 @@ llvm.func @rocdl.s.barrier() { rocdl.s.barrier llvm.return } + +llvm.func @rocdl.s.barrier.signal() { + // CHECK-LABEL: rocdl.s.barrier.signal + // CHECK: rocdl.s.barrier.signal -1 + rocdl.s.barrier.signal -1 + llvm.return +} + +llvm.func @rocdl.s.barrier.wait() { + // CHECK-LABEL: rocdl.s.barrier.wait + // CHECK: rocdl.s.barrier.wait -1 + rocdl.s.barrier.wait -1 + llvm.return +} + +llvm.func @rocdl.s.wait.dscnt() { + // CHECK-LABEL: rocdl.s.wait.dscnt + // CHECK: rocdl.s.wait.dscnt 0 + rocdl.s.wait.dscnt 0 + llvm.return +} + // ----- // expected-error@below {{attribute attached to unexpected op}} diff --git a/mlir/test/Target/LLVMIR/rocdl.mlir b/mlir/test/Target/LLVMIR/rocdl.mlir index 0f0c2412e5ec2a..08c2d4e6477970 100644 --- a/mlir/test/Target/LLVMIR/rocdl.mlir +++ b/mlir/test/Target/LLVMIR/rocdl.mlir @@ -142,6 +142,27 @@ llvm.func @rocdl.barrier() { llvm.return } +llvm.func @rocdl.s.barrier.signal() { + // CHECK-LABEL: rocdl.s.barrier.signal + // CHECK-NEXT: call void @llvm.amdgcn.s.barrier.signal(i32 -1) + rocdl.s.barrier.signal -1 + llvm.return +} + +llvm.func @rocdl.s.barrier.wait() { + // CHECK-LABEL: rocdl.s.barrier.wait + // CHECK-NEXT: call void @llvm.amdgcn.s.barrier.wait(i16 -1) + rocdl.s.barrier.wait -1 + llvm.return +} + +llvm.func @rocdl.s.wait.dscnt() { + // CHECK-LABEL: rocdl.s.wait.dscnt + // CHECK-NEXT: call void @llvm.amdgcn.s.wait.dscnt(i16 0) + rocdl.s.wait.dscnt 0 + llvm.return +} + llvm.func @rocdl.setprio() { // CHECK: call void @llvm.amdgcn.s.setprio(i16 0) rocdl.s.setprio 0