Skip to content

Commit

Permalink
Merge pull request #417 from turboderp/dev
Browse files Browse the repository at this point in the history
Fix ROCm compile
  • Loading branch information
turboderp authored Apr 19, 2024
2 parents ed118b4 + 8c83a02 commit ad8691c
Showing 1 changed file with 2 additions and 2 deletions.
4 changes: 2 additions & 2 deletions exllamav2/exllamav2_ext/cuda/cache.cu
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ __global__ void fp16_to_q4_kv_kernel

for (int i = 1; i < 32; i <<= 1)
{
half2 pw2 = __shfl_xor_sync(0xffffffff, w2, i, 32);
half2 pw2 = __shfl_xor_sync(0xffffffff, w2, i);
uint32_t* w2i = reinterpret_cast<uint32_t*>(&w2);
int32_t sfm = -static_cast<int32_t>(t & i) >> 31;
*w2i ^= (sfm & 0x80008000);
Expand Down Expand Up @@ -279,7 +279,7 @@ __global__ void q4_to_fp16_kv_kernel

for (int i = 1; i < 32; i <<= 1)
{
half2 pw2 = __shfl_xor_sync(0xffffffff, w2, i, 32);
half2 pw2 = __shfl_xor_sync(0xffffffff, w2, i);
uint32_t* w2i = reinterpret_cast<uint32_t*>(&w2);
int32_t sfm = -static_cast<int32_t>(t & i) >> 31;
*w2i ^= (sfm & 0x80008000);
Expand Down

0 comments on commit ad8691c

Please sign in to comment.