Skip to content

Commit

Permalink
Merge branch 'amd-staging' of ssh://gerrit-git.amd.com:29418/lightnin…
Browse files Browse the repository at this point in the history
…g/ec/llvm-project into amd-staging
  • Loading branch information
searlmc1 committed Apr 26, 2024
2 parents c37d45d + 066a0b2 commit bad8e09
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 80 deletions.
50 changes: 9 additions & 41 deletions amd/device-libs/ockl/src/dm.cl
Original file line number Diff line number Diff line change
Expand Up @@ -294,9 +294,14 @@ first(__global void * v)
return __builtin_astype(w2, __global void *);
}

REQUIRES_WAVE64
// Read val from one active lane whose predicate is one.
// If no lanes have the predicate set, return none
// This is like first, except that first may not have its predicate set
static uint
elect_uint_wave64(int pred, uint val, uint none) {
elect_uint(int pred, uint val, uint none)
{
// Pretend wave32 doesn't exist. The wave64 ballot works, and the high half
// will fold out as 0.
uint ret = none;

ulong mask = __builtin_amdgcn_ballot_w64(pred != 0);
Expand All @@ -308,49 +313,12 @@ elect_uint_wave64(int pred, uint val, uint none) {
return ret;
}

REQUIRES_WAVE32
static uint
elect_uint_wave32(int pred, uint val, uint none) {
uint ret = none;
uint mask = __builtin_amdgcn_ballot_w32(pred != 0);
if (mask != 0U) {
uint l = __ockl_ctz_u32(mask);
ret = __builtin_amdgcn_ds_bpermute(l << 2, val);
}

return ret;
}

// Read val from one active lane whose predicate is one.
// If no lanes have the predicate set, return none
// This is like first, except that first may not have its predicate set
static uint
elect_uint(int pred, uint val, uint none)
{
return __oclc_wavefrontsize64 ? elect_uint_wave64(pred, val, none) : elect_uint_wave32(pred, val, none);
}

REQUIRES_WAVE64
static uint
votes_wave64(bool b)
{
ulong mask = __builtin_amdgcn_ballot_w64(b);
return __builtin_popcountl(mask);
}

REQUIRES_WAVE32
static uint
votes_wave32(bool b)
{
uint mask = __builtin_amdgcn_ballot_w32(b);
return __builtin_popcount(mask);
}

// Count the number of nonzero arguments across the wave
static uint
votes(bool b)
{
return __oclc_wavefrontsize64 ? votes_wave64(b) : votes_wave32(b);
ulong mask = __builtin_amdgcn_ballot_w64(b);
return __builtin_popcountl(mask);
}

// The kind of the smallest block that can hold sz bytes
Expand Down
43 changes: 4 additions & 39 deletions amd/device-libs/ockl/src/wfaas.cl
Original file line number Diff line number Diff line change
Expand Up @@ -22,60 +22,25 @@ static int optimizationBarrierHack(int in_val)
return out_val;
}

REQUIRES_WAVE32
static bool wfany_impl_w32(int e) {
return __builtin_amdgcn_ballot_w32(e) != 0;
}

REQUIRES_WAVE64
static bool wfany_impl_w64(int e) {
return __builtin_amdgcn_ballot_w64(e) != 0;
}

ATTR bool
OCKL_MANGLE_I32(wfany)(int e)
{
e = optimizationBarrierHack(e);
return __oclc_wavefrontsize64 ?
wfany_impl_w64(e) : wfany_impl_w32(e);
}

REQUIRES_WAVE32
static bool wfall_impl_w32(int e) {
return __builtin_amdgcn_ballot_w32(e) == __builtin_amdgcn_read_exec_lo();
}

REQUIRES_WAVE64
static bool wfall_impl_w64(int e) {
return __builtin_amdgcn_ballot_w64(e) == __builtin_amdgcn_read_exec();
return __builtin_amdgcn_ballot_w64(e) != 0;
}

ATTR bool
OCKL_MANGLE_I32(wfall)(int e)
{
e = optimizationBarrierHack(e);
return __oclc_wavefrontsize64 ?
wfall_impl_w64(e) : wfall_impl_w32(e);
}


REQUIRES_WAVE32
static bool wfsame_impl_w32(int e) {
uint u = __builtin_amdgcn_ballot_w32(e);
return (u == 0) | (u == __builtin_amdgcn_read_exec_lo());
}

REQUIRES_WAVE64
static bool wfsame_impl_w64(int e) {
ulong u = __builtin_amdgcn_ballot_w64(e);
return (u == 0UL) | (u == __builtin_amdgcn_read_exec());
return __builtin_amdgcn_ballot_w64(e) == __builtin_amdgcn_read_exec();
}

ATTR bool
OCKL_MANGLE_I32(wfsame)(int e)
{
e = optimizationBarrierHack(e);
return __oclc_wavefrontsize64 ?
wfsame_impl_w64(e) : wfsame_impl_w32(e);
ulong u = __builtin_amdgcn_ballot_w64(e);
return (u == 0UL) | (u == __builtin_amdgcn_read_exec());
}

0 comments on commit bad8e09

Please sign in to comment.