Skip to content

Commit

Permalink
enforce uniform local ID for sub_group_broadcast
Browse files Browse the repository at this point in the history
sub_group_broadcast specs guarantees that local ID must be uniform and
function must be executed by all work-items in sub-group, otherwise
behavior is undefined. Compiler can take this into account and emit
less instructions. Previously sub_group_broadcast implementation was
based on shuffle. This commit splits broadcast into separate builtin
and marks local ID argument as thread-uniform.
  • Loading branch information
pkwasnie-intel authored and igcbot committed Aug 1, 2023
1 parent e283575 commit 2c7b963
Show file tree
Hide file tree
Showing 15 changed files with 187 additions and 35 deletions.
27 changes: 17 additions & 10 deletions IGC/BiFModule/Implementation/IGCBiF_Intrinsics.cl
Original file line number Diff line number Diff line change
Expand Up @@ -537,17 +537,24 @@ void __builtin_IB_assume_uniform(void*);
// SubGroup Functions
int __builtin_IB_get_simd_size( void );
int __builtin_IB_get_simd_id( void );
uint __builtin_IB_simd_shuffle( uint, uint );
bool __builtin_IB_simd_shuffle_b(bool, uint);
uchar __builtin_IB_simd_shuffle_c( uchar, uint );
ushort __builtin_IB_simd_shuffle_us( ushort, uint );
float __builtin_IB_simd_shuffle_f( float, uint );
half __builtin_IB_simd_shuffle_h( half, uint );
double __builtin_IB_simd_shuffle_df(double, uint);
uint __builtin_IB_simd_shuffle_down( uint, uint, uint );
uint __builtin_IB_simd_shuffle( uint, uint );
bool __builtin_IB_simd_shuffle_b( bool, uint);
uchar __builtin_IB_simd_shuffle_c( uchar, uint );
ushort __builtin_IB_simd_shuffle_us( ushort, uint );
float __builtin_IB_simd_shuffle_f( float, uint );
half __builtin_IB_simd_shuffle_h( half, uint );
double __builtin_IB_simd_shuffle_df( double, uint);
uint __builtin_IB_simd_shuffle_down( uint, uint, uint );
ushort __builtin_IB_simd_shuffle_down_us( ushort, ushort, uint );
uchar __builtin_IB_simd_shuffle_down_uc( uchar, uchar, uint );
void __builtin_IB_sub_group_barrier();
uchar __builtin_IB_simd_shuffle_down_uc( uchar, uchar, uint );
uint __builtin_IB_simd_broadcast( uint, uint );
bool __builtin_IB_simd_broadcast_b( bool, uint );
uchar __builtin_IB_simd_broadcast_c( uchar, uint );
ushort __builtin_IB_simd_broadcast_us( ushort, uint );
float __builtin_IB_simd_broadcast_f( float, uint );
half __builtin_IB_simd_broadcast_h( half, uint );
double __builtin_IB_simd_broadcast_df( double, uint );
void __builtin_IB_sub_group_barrier();

// Block read : global address space
uint __builtin_IB_simd_block_read_1_global( const __global uint* );
Expand Down
32 changes: 16 additions & 16 deletions IGC/BiFModule/Implementation/group.cl
Original file line number Diff line number Diff line change
Expand Up @@ -1108,7 +1108,7 @@ bool SPIRV_OVERLOADABLE SPIRV_BUILTIN(GroupBroadcast, _i32_i1_v3i32, )(int Execu
}
else if (Execution == Subgroup)
{
return __builtin_IB_simd_shuffle_b(Value, LocalId.s0);
return __builtin_IB_simd_broadcast_b(Value, LocalId.s0);
}
else
{
Expand All @@ -1124,7 +1124,7 @@ bool SPIRV_OVERLOADABLE SPIRV_BUILTIN(GroupBroadcast, _i32_i1_v3i64, )(int Execu
}
else if (Execution == Subgroup)
{
return __builtin_IB_simd_shuffle_b(Value, (uint)LocalId.s0);
return __builtin_IB_simd_broadcast_b(Value, (uint)LocalId.s0);
}
else
{
Expand All @@ -1140,7 +1140,7 @@ char SPIRV_OVERLOADABLE SPIRV_BUILTIN(GroupBroadcast, _i32_i8_v3i32, )(int Execu
}
else if (Execution == Subgroup)
{
return __builtin_IB_simd_shuffle_c(Value, LocalId.s0);
return __builtin_IB_simd_broadcast_c(Value, LocalId.s0);
}
else
{
Expand All @@ -1156,7 +1156,7 @@ char SPIRV_OVERLOADABLE SPIRV_BUILTIN(GroupBroadcast, _i32_i8_v3i64, )(int Execu
}
else if (Execution == Subgroup)
{
return __builtin_IB_simd_shuffle_c(Value, (uint)LocalId.s0);
return __builtin_IB_simd_broadcast_c(Value, (uint)LocalId.s0);
}
else
{
Expand All @@ -1172,7 +1172,7 @@ short SPIRV_OVERLOADABLE SPIRV_BUILTIN(GroupBroadcast, _i32_i16_v3i32, )(int Exe
}
else if (Execution == Subgroup)
{
return as_ushort(__builtin_IB_simd_shuffle_h(as_half(Value), LocalId.s0));
return as_ushort(__builtin_IB_simd_broadcast_h(as_half(Value), LocalId.s0));
}
else
{
Expand All @@ -1188,7 +1188,7 @@ short SPIRV_OVERLOADABLE SPIRV_BUILTIN(GroupBroadcast, _i32_i16_v3i64, )(int Exe
}
else if (Execution == Subgroup)
{
return as_ushort(__builtin_IB_simd_shuffle_h(as_half(Value), (uint)LocalId.s0));
return as_ushort(__builtin_IB_simd_broadcast_h(as_half(Value), (uint)LocalId.s0));
}
else
{
Expand All @@ -1204,7 +1204,7 @@ int SPIRV_OVERLOADABLE SPIRV_BUILTIN(GroupBroadcast, _i32_i32_v3i32, )(int Execu
}
else if (Execution == Subgroup)
{
return __builtin_IB_simd_shuffle(Value, LocalId.s0);
return __builtin_IB_simd_broadcast(Value, LocalId.s0);
}
else
{
Expand All @@ -1220,7 +1220,7 @@ int SPIRV_OVERLOADABLE SPIRV_BUILTIN(GroupBroadcast, _i32_i32_v3i64, )(int Execu
}
else if (Execution == Subgroup)
{
return __builtin_IB_simd_shuffle(Value, (uint)LocalId.s0);
return __builtin_IB_simd_broadcast(Value, (uint)LocalId.s0);
}
else
{
Expand All @@ -1236,7 +1236,7 @@ long SPIRV_OVERLOADABLE SPIRV_BUILTIN(GroupBroadcast, _i32_i64_v3i32, )(int Exec
}
else if (Execution == Subgroup)
{
return ((((ulong)__builtin_IB_simd_shuffle(Value >> 32, LocalId.s0)) << 32 ) | __builtin_IB_simd_shuffle((uint)Value, LocalId.s0));
return ((((ulong)__builtin_IB_simd_broadcast(Value >> 32, LocalId.s0)) << 32 ) | __builtin_IB_simd_broadcast((uint)Value, LocalId.s0));
}
else
{
Expand All @@ -1252,7 +1252,7 @@ long SPIRV_OVERLOADABLE SPIRV_BUILTIN(GroupBroadcast, _i32_i64_v3i64, )(int Exec
}
else if (Execution == Subgroup)
{
return ((((ulong)__builtin_IB_simd_shuffle(Value >> 32, (uint)LocalId.s0)) << 32 ) | __builtin_IB_simd_shuffle((uint)Value, (uint)LocalId.s0));
return ((((ulong)__builtin_IB_simd_broadcast(Value >> 32, (uint)LocalId.s0)) << 32 ) | __builtin_IB_simd_broadcast((uint)Value, (uint)LocalId.s0));
}
else
{
Expand All @@ -1268,7 +1268,7 @@ half SPIRV_OVERLOADABLE SPIRV_BUILTIN(GroupBroadcast, _i32_f16_v3i32, )(int Exec
}
else if (Execution == Subgroup)
{
return __builtin_IB_simd_shuffle_h( Value, (uint)LocalId.s0 );
return __builtin_IB_simd_broadcast_h( Value, (uint)LocalId.s0 );
}
else
{
Expand All @@ -1284,7 +1284,7 @@ half SPIRV_OVERLOADABLE SPIRV_BUILTIN(GroupBroadcast, _i32_f16_v3i64, )(int Exec
}
else if (Execution == Subgroup)
{
return as_half2(__builtin_IB_simd_shuffle( (uint)(as_short(Value)), (uint)LocalId.s0 )).x;
return as_half2(__builtin_IB_simd_broadcast( (uint)(as_short(Value)), (uint)LocalId.s0 )).x;
}
else
{
Expand All @@ -1300,7 +1300,7 @@ float SPIRV_OVERLOADABLE SPIRV_BUILTIN(GroupBroadcast, _i32_f32_v3i32, )(int Exe
}
else if (Execution == Subgroup)
{
return __builtin_IB_simd_shuffle_f( Value, LocalId.s0 );
return __builtin_IB_simd_broadcast_f( Value, LocalId.s0 );
}
else
{
Expand All @@ -1316,7 +1316,7 @@ float SPIRV_OVERLOADABLE SPIRV_BUILTIN(GroupBroadcast, _i32_f32_v3i64, )(int Exe
}
else if (Execution == Subgroup)
{
return __builtin_IB_simd_shuffle_f( Value, (uint)LocalId.s0 );
return __builtin_IB_simd_broadcast_f( Value, (uint)LocalId.s0 );
}
else
{
Expand All @@ -1334,7 +1334,7 @@ double SPIRV_OVERLOADABLE SPIRV_BUILTIN(GroupBroadcast, _i32_f64_v3i32, )(int Ex
}
else if (Execution == Subgroup)
{
return __builtin_IB_simd_shuffle_df( Value, LocalId.s0 );
return __builtin_IB_simd_broadcast_df( Value, LocalId.s0 );
}
else
{
Expand All @@ -1351,7 +1351,7 @@ double SPIRV_OVERLOADABLE SPIRV_BUILTIN(GroupBroadcast, _i32_f64_v3i64, )(int Ex
}
else if (Execution == Subgroup)
{
return __builtin_IB_simd_shuffle_df( Value, (uint) LocalId.s0 );
return __builtin_IB_simd_broadcast_df( Value, (uint) LocalId.s0 );
}
else
{
Expand Down
1 change: 1 addition & 0 deletions IGC/Compiler/CISACodeGen/CheckInstrTypes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,7 @@ void CheckInstrTypes::visitCallInst(CallInst& C)
g_InstrTypes.hasDiscard = true;
break;
case GenISAIntrinsic::GenISA_WaveShuffleIndex:
case GenISAIntrinsic::GenISA_WaveBroadcast:
g_InstrTypes.mayHaveIndirectOperands = true;
g_InstrTypes.numWaveIntrinsics++;
break;
Expand Down
4 changes: 3 additions & 1 deletion IGC/Compiler/CISACodeGen/EmitVISAPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8328,6 +8328,7 @@ void EmitPass::EmitGenIntrinsicMessage(llvm::GenIntrinsicInst* inst)
emitWaveInverseBallot(inst);
break;
case GenISAIntrinsic::GenISA_WaveShuffleIndex:
case GenISAIntrinsic::GenISA_WaveBroadcast:
emitSimdShuffle(inst);
break;
case GenISAIntrinsic::GenISA_WavePrefix:
Expand Down Expand Up @@ -8861,7 +8862,8 @@ bool EmitPass::waveShuffleCase(CVariable* Var, BasicBlock* BB, Instruction* I, b
if (GenIntrinsicInst* WaveShuffleIndexInst = dyn_cast<GenIntrinsicInst>(UI->getOperand(i)))
{
// if some of the payload come from waveShuffleIndex with indirect index, add the lifetimeStart.
if (WaveShuffleIndexInst->getIntrinsicID() == GenISAIntrinsic::GenISA_WaveShuffleIndex)
if (WaveShuffleIndexInst->getIntrinsicID() == GenISAIntrinsic::GenISA_WaveShuffleIndex ||
WaveShuffleIndexInst->getIntrinsicID() == GenISAIntrinsic::GenISA_WaveBroadcast)
{
CVariable* data = GetSymbol(WaveShuffleIndexInst->getOperand(0));
CVariable* simdChannel = GetSymbol(WaveShuffleIndexInst->getOperand(1));
Expand Down
10 changes: 7 additions & 3 deletions IGC/Compiler/CISACodeGen/PatternMatchPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1307,6 +1307,7 @@ namespace IGC
match = MatchIntegerSatModifier(I);
break;
case GenISAIntrinsic::GenISA_WaveShuffleIndex:
case GenISAIntrinsic::GenISA_WaveBroadcast:
match = MatchRegisterRegion(*GII) ||
MatchShuffleBroadCast(*GII) ||
MatchWaveShuffleIndex(*GII);
Expand Down Expand Up @@ -4590,7 +4591,8 @@ namespace IGC
// by WaveShuffleIndex intrinsic
GenIntrinsicInst* WSI = dyn_cast<GenIntrinsicInst>(elem);
if (!WSI ||
WSI->getIntrinsicID() != GenISAIntrinsic::GenISA_WaveShuffleIndex)
(WSI->getIntrinsicID() != GenISAIntrinsic::GenISA_WaveShuffleIndex &&
WSI->getIntrinsicID() != GenISAIntrinsic::GenISA_WaveBroadcast))
{
WSVal = nullptr;
break;
Expand Down Expand Up @@ -4705,7 +4707,9 @@ namespace IGC
break;

llvm::GenIntrinsicInst* intrin = llvm::dyn_cast<llvm::GenIntrinsicInst>(temp);
if (!intrin || intrin->getIntrinsicID() != GenISAIntrinsic::GenISA_WaveShuffleIndex)
if (!intrin ||
(intrin->getIntrinsicID() != GenISAIntrinsic::GenISA_WaveShuffleIndex &&
intrin->getIntrinsicID() != GenISAIntrinsic::GenISA_WaveBroadcast))
break;
waveInst[i] = temp;
}
Expand Down Expand Up @@ -5161,7 +5165,7 @@ namespace IGC
if (llvm::GenIntrinsicInst * intrin = llvm::dyn_cast<llvm::GenIntrinsicInst>(source))
{
GenISAIntrinsic::ID id = intrin->getIntrinsicID();
if (id == GenISAIntrinsic::GenISA_WaveShuffleIndex)
if (id == GenISAIntrinsic::GenISA_WaveShuffleIndex || id == GenISAIntrinsic::GenISA_WaveBroadcast)
{
if (llvm::ConstantInt * channelVal = llvm::dyn_cast<llvm::ConstantInt>(intrin->getOperand(1)))
{
Expand Down
6 changes: 5 additions & 1 deletion IGC/Compiler/CISACodeGen/PromoteInt8Type.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1111,13 +1111,15 @@ void PromoteInt8Type::promoteIntrinsic()
if (!GII)
continue;
if (GII->isGenIntrinsic(GenISAIntrinsic::GenISA_WaveShuffleIndex) ||
GII->isGenIntrinsic(GenISAIntrinsic::GenISA_WaveBroadcast) ||
GII->isGenIntrinsic(GenISAIntrinsic::GenISA_simdShuffleDown))
{
// Those are mov insts. Need to promote if its operand is
// of type I8 and index is not uniform.
Type* Ty = GII->getType();
Value* IndexOrDelta;
if (GII->isGenIntrinsic(GenISAIntrinsic::GenISA_WaveShuffleIndex)) {
if (GII->isGenIntrinsic(GenISAIntrinsic::GenISA_WaveShuffleIndex) ||
GII->isGenIntrinsic(GenISAIntrinsic::GenISA_WaveBroadcast)) {
IndexOrDelta = GII->getArgOperand(1);
}
else {
Expand Down Expand Up @@ -1158,6 +1160,7 @@ void PromoteInt8Type::promoteIntrinsic()
gid == GenISAIntrinsic::GenISA_WavePrefix ||
gid == GenISAIntrinsic::GenISA_QuadPrefix ||
gid == GenISAIntrinsic::GenISA_WaveShuffleIndex ||
gid == GenISAIntrinsic::GenISA_WaveBroadcast ||
gid == GenISAIntrinsic::GenISA_simdShuffleDown)
{
//
Expand Down Expand Up @@ -1214,6 +1217,7 @@ void PromoteInt8Type::promoteIntrinsic()
}
case GenISAIntrinsic::GenISA_QuadPrefix:
case GenISAIntrinsic::GenISA_WaveShuffleIndex:
case GenISAIntrinsic::GenISA_WaveBroadcast:
case GenISAIntrinsic::GenISA_WaveAll:
{
// prototype:
Expand Down
22 changes: 21 additions & 1 deletion IGC/Compiler/CISACodeGen/WIAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -844,6 +844,12 @@ void WIAnalysisRunner::calculate_dep(const Value* val)
dep = WIAnalysis::UNIFORM_THREAD;
}

// Spec enforces subgroup broadcast to use thread-uniform local ID.
if (isWaveBroadcastIndex(inst))
{
dep = WIAnalysis::UNIFORM_THREAD;
}

// If the value was changed in this calculation
if (!hasOriginal || dep != orig)
{
Expand Down Expand Up @@ -901,6 +907,18 @@ bool WIAnalysisRunner::isRegionInvariant(const llvm::Instruction* defi, BranchIn
return true;
}

bool WIAnalysisRunner::isWaveBroadcastIndex(const llvm::Instruction* inst)
{
for (auto it = inst->users().begin(); it != inst->users().end(); ++it)
{
const GenIntrinsicInst* GII = dyn_cast<GenIntrinsicInst>(*it);
if (GII && GII->getIntrinsicID() == GenISAIntrinsic::GenISA_WaveBroadcast && GII->getOperand(1) == inst)
return true;
}

return false;
}

void WIAnalysisRunner::update_cf_dep(const IGCLLVM::TerminatorInst* inst)
{
IGC_ASSERT(hasDependency(inst));
Expand Down Expand Up @@ -1369,6 +1387,7 @@ WIAnalysis::WIDependancy WIAnalysisRunner::calculate_dep(const CallInst* inst)
intrinsic_name == llvm_ldraw_indexed ||
intrinsic_name == llvm_cycleCounter ||
intrinsic_name == llvm_waveShuffleIndex ||
intrinsic_name == llvm_waveBroadcast ||
intrinsic_name == llvm_waveBallot ||
intrinsic_name == llvm_waveAll ||
intrinsic_name == llvm_waveClustered ||
Expand Down Expand Up @@ -1601,7 +1620,8 @@ WIAnalysis::WIDependancy WIAnalysisRunner::calculate_dep(const CallInst* inst)
return WIAnalysis::UNIFORM_THREAD;
}

if (intrinsic_name == llvm_waveShuffleIndex)
if (intrinsic_name == llvm_waveShuffleIndex ||
intrinsic_name == llvm_waveBroadcast)
{
Value* op0 = inst->getArgOperand(0);
Value* op1 = inst->getArgOperand(1);
Expand Down
3 changes: 3 additions & 0 deletions IGC/Compiler/CISACodeGen/WIAnalysis.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,9 @@ namespace IGC
/// @brief return true if all the source operands are defined outside the region
bool isRegionInvariant(const llvm::Instruction* inst, BranchInfo* brInfo, unsigned level);

/// @brief return true if instruction in lane ID in subgroup broadcast
bool isWaveBroadcastIndex(const llvm::Instruction* inst);

/// @brief update dependency structure for Alloca
bool TrackAllocaDep(const llvm::Value* I, AllocaDep& dep);

Expand Down
2 changes: 2 additions & 0 deletions IGC/Compiler/CISACodeGen/helper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1402,6 +1402,7 @@ namespace IGC
switch (GII->getIntrinsicID())
{
case GenISAIntrinsic::GenISA_WaveShuffleIndex:
case GenISAIntrinsic::GenISA_WaveBroadcast:
case GenISAIntrinsic::GenISA_simdShuffleDown:
case GenISAIntrinsic::GenISA_simdShuffleXor:
case GenISAIntrinsic::GenISA_simdBlockRead:
Expand Down Expand Up @@ -1682,6 +1683,7 @@ namespace IGC
opcode == llvm_waveClustered ||
opcode == llvm_wavePrefix ||
opcode == llvm_waveShuffleIndex ||
opcode == llvm_waveBroadcast ||
opcode == llvm_waveBallot ||
opcode == llvm_simdShuffleDown ||
opcode == llvm_simdBlockRead||
Expand Down
1 change: 1 addition & 0 deletions IGC/Compiler/CISACodeGen/opCode.h
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,7 @@ DECLARE_OPCODE(GenISA_WaveClustered, GenISAIntrinsic, llvm_waveClustered, false,
DECLARE_OPCODE(GenISA_WavePrefix, GenISAIntrinsic, llvm_wavePrefix, false, false, false, false, false, false, false)
DECLARE_OPCODE(GenISA_QuadPrefix, GenISAIntrinsic, llvm_quadPrefix, false, false, false, false, false, false, false)
DECLARE_OPCODE(GenISA_WaveShuffleIndex, GenISAIntrinsic, llvm_waveShuffleIndex, false, false, false, false, false, false, false)
DECLARE_OPCODE(GenISA_WaveBroadcast, GenISAIntrinsic, llvm_waveBroadcast, false, false, false, false, false, false, false)

// Unmasked region
DECLARE_OPCODE(GenISA_UnmaskedRegionBegin, GenISAIntrinsic, llvm_unmaskedBegin, false, false, false, false, false, false, false)
Expand Down
Loading

0 comments on commit 2c7b963

Please sign in to comment.