Skip to content

Commit

Permalink
[SPIR-V] Implement support of the SPV_INTEL_split_barrier SPIRV exten…
Browse files Browse the repository at this point in the history
  • Loading branch information
VyacheslavLevytskyy authored Oct 15, 2024
1 parent 8d8996d commit bfe84f7
Show file tree
Hide file tree
Showing 10 changed files with 412 additions and 6 deletions.
2 changes: 2 additions & 0 deletions llvm/docs/SPIRVUsage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,8 @@ list of supported SPIR-V extensions, sorted alphabetically by their extension na
- Adds decorations that can be applied to global (module scope) variables to help code generation for FPGA devices.
* - ``SPV_INTEL_optnone``
- Adds OptNoneINTEL value for Function Control mask that indicates a request to not optimize the function.
* - ``SPV_INTEL_split_barrier``
- Adds SPIR-V instructions to split a control barrier into two separate operations: the first indicates that an invocation has "arrived" at the barrier but should continue executing, and the second indicates that an invocation should "wait" for other invocations to arrive at the barrier before executing further.
* - ``SPV_INTEL_subgroups``
- Allows work items in a subgroup to share data without the use of local memory and work group barriers, and to utilize specialized hardware to load and store blocks of data from images or buffers.
* - ``SPV_INTEL_usm_storage_classes``
Expand Down
27 changes: 21 additions & 6 deletions llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -880,6 +880,18 @@ static bool buildAtomicFlagInst(const SPIRV::IncomingCall *Call,
static bool buildBarrierInst(const SPIRV::IncomingCall *Call, unsigned Opcode,
MachineIRBuilder &MIRBuilder,
SPIRVGlobalRegistry *GR) {
const SPIRV::DemangledBuiltin *Builtin = Call->Builtin;
const auto *ST =
static_cast<const SPIRVSubtarget *>(&MIRBuilder.getMF().getSubtarget());
if ((Opcode == SPIRV::OpControlBarrierArriveINTEL ||
Opcode == SPIRV::OpControlBarrierWaitINTEL) &&
!ST->canUseExtension(SPIRV::Extension::SPV_INTEL_split_barrier)) {
std::string DiagMsg = std::string(Builtin->Name) +
": the builtin requires the following SPIR-V "
"extension: SPV_INTEL_split_barrier";
report_fatal_error(DiagMsg.c_str(), false);
}

if (Call->isSpirvOp())
return buildOpFromWrapper(MIRBuilder, Opcode, Call, Register(0));

Expand All @@ -896,13 +908,16 @@ static bool buildBarrierInst(const SPIRV::IncomingCall *Call, unsigned Opcode,
if (MemFlags & SPIRV::CLK_IMAGE_MEM_FENCE)
MemSemantics |= SPIRV::MemorySemantics::ImageMemory;

if (Opcode == SPIRV::OpMemoryBarrier) {
std::memory_order MemOrder =
static_cast<std::memory_order>(getIConstVal(Call->Arguments[1], MRI));
MemSemantics = getSPIRVMemSemantics(MemOrder) | MemSemantics;
} else {
if (Opcode == SPIRV::OpMemoryBarrier)
MemSemantics = getSPIRVMemSemantics(static_cast<std::memory_order>(
getIConstVal(Call->Arguments[1], MRI))) |
MemSemantics;
else if (Opcode == SPIRV::OpControlBarrierArriveINTEL)
MemSemantics |= SPIRV::MemorySemantics::Release;
else if (Opcode == SPIRV::OpControlBarrierWaitINTEL)
MemSemantics |= SPIRV::MemorySemantics::Acquire;
else
MemSemantics |= SPIRV::MemorySemantics::SequentiallyConsistent;
}

Register MemSemanticsReg =
MemFlags == MemSemantics
Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/Target/SPIRV/SPIRVBuiltins.td
Original file line number Diff line number Diff line change
Expand Up @@ -628,6 +628,12 @@ defm : DemangledNativeBuiltin<"barrier", OpenCL_std, Barrier, 1, 3, OpControlBar
defm : DemangledNativeBuiltin<"work_group_barrier", OpenCL_std, Barrier, 1, 3, OpControlBarrier>;
defm : DemangledNativeBuiltin<"__spirv_ControlBarrier", OpenCL_std, Barrier, 3, 3, OpControlBarrier>;

// cl_intel_split_work_group_barrier
defm : DemangledNativeBuiltin<"intel_work_group_barrier_arrive", OpenCL_std, Barrier, 1, 2, OpControlBarrierArriveINTEL>;
defm : DemangledNativeBuiltin<"__spirv_ControlBarrierArriveINTEL", OpenCL_std, Barrier, 3, 3, OpControlBarrierArriveINTEL>;
defm : DemangledNativeBuiltin<"intel_work_group_barrier_wait", OpenCL_std, Barrier, 1, 2, OpControlBarrierWaitINTEL>;
defm : DemangledNativeBuiltin<"__spirv_ControlBarrierWaitINTEL", OpenCL_std, Barrier, 3, 3, OpControlBarrierWaitINTEL>;

// Kernel enqueue builtin records:
defm : DemangledNativeBuiltin<"__enqueue_kernel_basic", OpenCL_std, Enqueue, 5, 5, OpEnqueueKernel>;
defm : DemangledNativeBuiltin<"__enqueue_kernel_basic_events", OpenCL_std, Enqueue, 8, 8, OpEnqueueKernel>;
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ static const std::map<std::string, SPIRV::Extension::Extension>
{"SPV_INTEL_optnone", SPIRV::Extension::Extension::SPV_INTEL_optnone},
{"SPV_INTEL_usm_storage_classes",
SPIRV::Extension::Extension::SPV_INTEL_usm_storage_classes},
{"SPV_INTEL_split_barrier",
SPIRV::Extension::Extension::SPV_INTEL_split_barrier},
{"SPV_INTEL_subgroups",
SPIRV::Extension::Extension::SPV_INTEL_subgroups},
{"SPV_KHR_uniform_group_instructions",
Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/Target/SPIRV/SPIRVInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -703,6 +703,12 @@ def OpNamedBarrierInitialize: UnOp<"OpNamedBarrierInitialize", 328>;
def OpMemoryNamedBarrier: Op<329, (outs), (ins ID:$barr, ID:$mem, ID:$sem),
"OpMemoryNamedBarrier $barr $mem $sem">;

// SPV_INTEL_split_barrier
def OpControlBarrierArriveINTEL: Op<6142, (outs), (ins ID:$exec, ID:$mem, ID:$sem),
"OpControlBarrierArriveINTEL $exec $mem $sem">;
def OpControlBarrierWaitINTEL: Op<6143, (outs), (ins ID:$exec, ID:$mem, ID:$sem),
"OpControlBarrierWaitINTEL $exec $mem $sem">;

// 3.42.21. Group and Subgroup Instructions

def OpGroupAsyncCopy: Op<259, (outs ID:$res), (ins TYPE:$ty, ID:$scope,
Expand Down
7 changes: 7 additions & 0 deletions llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1211,6 +1211,13 @@ void addInstrRequirements(const MachineInstr &MI,
Reqs.addExtension(SPIRV::Extension::SPV_EXT_arithmetic_fence);
Reqs.addCapability(SPIRV::Capability::ArithmeticFenceEXT);
break;
case SPIRV::OpControlBarrierArriveINTEL:
case SPIRV::OpControlBarrierWaitINTEL:
if (ST.canUseExtension(SPIRV::Extension::SPV_INTEL_split_barrier)) {
Reqs.addExtension(SPIRV::Extension::SPV_INTEL_split_barrier);
Reqs.addCapability(SPIRV::Capability::SplitBarrierINTEL);
}
break;
default:
break;
}
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td
Original file line number Diff line number Diff line change
Expand Up @@ -482,6 +482,7 @@ defm GlobalVariableFPGADecorationsINTEL : CapabilityOperand<6189, 0, 0, [SPV_INT
defm CacheControlsINTEL : CapabilityOperand<6441, 0, 0, [SPV_INTEL_cache_controls], []>;
defm CooperativeMatrixKHR : CapabilityOperand<6022, 0, 0, [SPV_KHR_cooperative_matrix], []>;
defm ArithmeticFenceEXT : CapabilityOperand<6144, 0, 0, [SPV_EXT_arithmetic_fence], []>;
defm SplitBarrierINTEL : CapabilityOperand<6141, 0, 0, [SPV_INTEL_split_barrier], []>;

//===----------------------------------------------------------------------===//
// Multiclass used to define SourceLanguage enum values and at the same time
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
; Adapted from Khronos Translator test suite: test/CodeGen/SPIRV/extensions/SPV_INTEL_split_barrier/

;; kernel void test(global uint* dst)
;; {
;; intel_work_group_barrier_arrive(CLK_LOCAL_MEM_FENCE);
;; intel_work_group_barrier_wait(CLK_LOCAL_MEM_FENCE);
;; intel_work_group_barrier_arrive(CLK_GLOBAL_MEM_FENCE);
;; intel_work_group_barrier_wait(CLK_GLOBAL_MEM_FENCE);
;;
;; intel_work_group_barrier_arrive(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE);
;; intel_work_group_barrier_wait(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE);
;;}

; RUN: not llc -O0 -mtriple=spirv64-unknown-unknown %s -o %t.spvt 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR

; CHECK-ERROR: LLVM ERROR: intel_work_group_barrier_arrive: the builtin requires the following SPIR-V extension: SPV_INTEL_split_barrier

; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown --spirv-ext=+SPV_INTEL_split_barrier %s -o - | FileCheck %s
; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown --spirv-ext=+SPV_INTEL_split_barrier %s -o - -filetype=obj | spirv-val %}

; CHECK: Capability SplitBarrierINTEL
; CHECK: Extension "SPV_INTEL_split_barrier"
; CHECK: %[[#UINT:]] = OpTypeInt 32 0
;
; Scopes:
; CHECK-DAG: %[[#SCOPE_WORK_GROUP:]] = OpConstant %[[#UINT]] 2{{$}}
;
; Memory Semantics:
; 0x2 Acquire + 0x100 WorkgroupMemory
; CHECK-DAG: %[[#ACQUIRE_LOCAL:]] = OpConstant %[[#UINT]] 258
; 0x4 Release + 0x100 WorkgroupMemory
; CHECK-DAG: %[[#RELEASE_LOCAL:]] = OpConstant %[[#UINT]] 260
; 0x2 Acquire + 0x200 CrossWorkgroupMemory
; CHECK-DAG: %[[#ACQUIRE_GLOBAL:]] = OpConstant %[[#UINT]] 514
; 0x4 Release + 0x200 CrossWorkgroupMemory
; CHECK-DAG: %[[#RELEASE_GLOBAL:]] = OpConstant %[[#UINT]] 516
; 0x2 Acquire + 0x100 WorkgroupMemory + 0x200 CrossWorkgroupMemory
; CHECK-DAG: %[[#ACQUIRE_LOCAL_GLOBAL:]] = OpConstant %[[#UINT]] 770
; 0x4 Release + 0x100 WorkgroupMemory + 0x200 CrossWorkgroupMemory
; CHECK-DAG: %[[#RELEASE_LOCAL_GLOBAL:]] = OpConstant %[[#UINT]] 772
;
; CHECK: OpControlBarrierArriveINTEL %[[#SCOPE_WORK_GROUP]] %[[#SCOPE_WORK_GROUP]] %[[#RELEASE_LOCAL]]
; CHECK: OpControlBarrierWaitINTEL %[[#SCOPE_WORK_GROUP]] %[[#SCOPE_WORK_GROUP]] %[[#ACQUIRE_LOCAL]]
; CHECK: OpControlBarrierArriveINTEL %[[#SCOPE_WORK_GROUP]] %[[#SCOPE_WORK_GROUP]] %[[#RELEASE_GLOBAL]]
; CHECK: OpControlBarrierWaitINTEL %[[#SCOPE_WORK_GROUP]] %[[#SCOPE_WORK_GROUP]] %[[#ACQUIRE_GLOBAL]]
;
; CHECK: OpControlBarrierArriveINTEL %[[#SCOPE_WORK_GROUP]] %[[#SCOPE_WORK_GROUP]] %[[#RELEASE_LOCAL_GLOBAL]]
; CHECK: OpControlBarrierWaitINTEL %[[#SCOPE_WORK_GROUP]] %[[#SCOPE_WORK_GROUP]] %[[#ACQUIRE_LOCAL_GLOBAL]]

; Function Attrs: convergent norecurse nounwind
define dso_local spir_kernel void @test(ptr addrspace(1) nocapture noundef readnone align 4 %0) local_unnamed_addr #0 !kernel_arg_addr_space !4 !kernel_arg_access_qual !5 !kernel_arg_type !6 !kernel_arg_base_type !6 !kernel_arg_type_qual !7 {
tail call spir_func void @_Z31intel_work_group_barrier_arrivej(i32 noundef 1) #2
tail call spir_func void @_Z29intel_work_group_barrier_waitj(i32 noundef 1) #2
tail call spir_func void @_Z31intel_work_group_barrier_arrivej(i32 noundef 2) #2
tail call spir_func void @_Z29intel_work_group_barrier_waitj(i32 noundef 2) #2
tail call spir_func void @_Z31intel_work_group_barrier_arrivej(i32 noundef 3) #2
tail call spir_func void @_Z29intel_work_group_barrier_waitj(i32 noundef 3) #2
ret void
}

; Function Attrs: convergent
declare dso_local spir_func void @_Z31intel_work_group_barrier_arrivej(i32 noundef) local_unnamed_addr #1

; Function Attrs: convergent
declare dso_local spir_func void @_Z29intel_work_group_barrier_waitj(i32 noundef) local_unnamed_addr #1

attributes #0 = { convergent norecurse nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" }
attributes #1 = { convergent "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
attributes #2 = { convergent nounwind }

!llvm.module.flags = !{!0, !1}
!opencl.ocl.version = !{!2}
!opencl.spir.version = !{!2}
!llvm.ident = !{!3}

!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{i32 7, !"frame-pointer", i32 2}
!2 = !{i32 1, i32 2}
!3 = !{!"clang version 15.0.0 (https://github.com/llvm/llvm-project 861386dbd6ff0d91636b7c674c2abb2eccd9d3f2)"}
!4 = !{i32 1}
!5 = !{!"none"}
!6 = !{!"uint*"}
!7 = !{!""}
Loading

0 comments on commit bfe84f7

Please sign in to comment.