Skip to content

Commit

Permalink
[HIP][libclc] Fix race in cmpxchg and change xor to use CAS instead o…
Browse files Browse the repository at this point in the history
…f atomic builtin (#11876)

1. Fix a race condition in cmpxchg
2. Change `atomic_xor` to use a CAS loop instead of atomic builtin. This
is needed to merge this in UR
oneapi-src/unified-runtime#936 so that perf
regression can be fixed. The long term fix is to use a compiler flag to
choose between builtin and safe CAS implementation, but talks upstream
may take some time to figure out the details. See
llvm/llvm-project#69229
  • Loading branch information
hdelan authored Nov 15, 2023
1 parent 0b6b3b8 commit c1ef658
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 11 deletions.
19 changes: 12 additions & 7 deletions libclc/amdgcn-amdhsa/libspirv/atomic/atomic_cmpxchg.cl
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,17 @@
memory_order_success) \
GET_ATOMIC_SCOPE_AND_ORDER(scope, atomic_scope, failure_semantics, \
memory_order_failure) \
TYPE original_val = *p; \
bool success = __hip_atomic_compare_exchange_strong( \
p, &expected, desired, memory_order_success, memory_order_failure, \
atomic_scope); \
\
return success ? original_val : *p; \
__hip_atomic_compare_exchange_strong(p, &expected, desired, \
memory_order_success, \
memory_order_failure, atomic_scope); \
/* If cmpxchg \
* succeeds: \
- `expected` is unchanged, holding the old val that was at `p` \
- `p` is changed to hold `desired` \
* fails: \
- `expected` is changed to hold the current val at `p` \
- `p` is unchanged*/ \
return expected; \
}

#define AMDGPU_ATOMIC_CMPXCHG(TYPE, TYPE_MANGLED) \
Expand All @@ -37,7 +42,7 @@
AMDGPU_ATOMIC_CMPXCHG_IMPL(TYPE, TYPE_MANGLED, , , 0, 4)

AMDGPU_ATOMIC_CMPXCHG(int, i)
AMDGPU_ATOMIC_CMPXCHG(unsigned int, j)
AMDGPU_ATOMIC_CMPXCHG(unsigned, j)
AMDGPU_ATOMIC_CMPXCHG(long, l)
AMDGPU_ATOMIC_CMPXCHG(unsigned long, m)
AMDGPU_ATOMIC_CMPXCHG(float, f)
Expand Down
22 changes: 22 additions & 0 deletions libclc/amdgcn-amdhsa/libspirv/atomic/atomic_helpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,3 +71,25 @@
AMDGPU_ATOMIC_IMPL(FUNC_NAME, TYPE, TYPE_MANGLED, global, U3AS1, 1, BUILTIN) \
AMDGPU_ATOMIC_IMPL(FUNC_NAME, TYPE, TYPE_MANGLED, local, U3AS3, 1, BUILTIN) \
AMDGPU_ATOMIC_IMPL(FUNC_NAME, TYPE, TYPE_MANGLED, , , 0, BUILTIN)

#define AMDGPU_CAS_ATOMIC_IMPL(FUNC_NAME, TYPE, TYPE_MANGLED, AS, AS_MANGLED, \
SUB1, OP) \
_CLC_DEF TYPE \
FUNC_NAME##P##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS##SUB1##_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \
volatile AS TYPE *p, enum Scope scope, \
enum MemorySemanticsMask semantics, TYPE val) { \
int atomic_scope = 0, memory_order = 0; \
GET_ATOMIC_SCOPE_AND_ORDER(scope, atomic_scope, semantics, memory_order) \
TYPE oldval = __hip_atomic_load(p, memory_order, atomic_scope); \
TYPE newval = 0; \
do { \
newval = oldval OP val; \
} while (!__hip_atomic_compare_exchange_strong( \
p, &oldval, newval, atomic_scope, atomic_scope, memory_order)); \
return oldval; \
}

#define AMDGPU_CAS_ATOMIC(FUNC_NAME, TYPE, TYPE_MANGLED, OP) \
AMDGPU_CAS_ATOMIC_IMPL(FUNC_NAME, TYPE, TYPE_MANGLED, global, U3AS1, 1, OP) \
AMDGPU_CAS_ATOMIC_IMPL(FUNC_NAME, TYPE, TYPE_MANGLED, local, U3AS3, 1, OP) \
AMDGPU_CAS_ATOMIC_IMPL(FUNC_NAME, TYPE, TYPE_MANGLED, , , 0, OP)
11 changes: 7 additions & 4 deletions libclc/amdgcn-amdhsa/libspirv/atomic/atomic_xor.cl
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,14 @@
#include <spirv/spirv.h>
#include <spirv/spirv_types.h>

AMDGPU_ATOMIC(_Z17__spirv_AtomicXor, int, i, __hip_atomic_fetch_xor)
AMDGPU_ATOMIC(_Z17__spirv_AtomicXor, unsigned int, j, __hip_atomic_fetch_xor)
AMDGPU_ATOMIC(_Z17__spirv_AtomicXor, long, l, __hip_atomic_fetch_xor)
AMDGPU_ATOMIC(_Z17__spirv_AtomicXor, unsigned long, m, __hip_atomic_fetch_xor)
#define __CLC_XOR ^

AMDGPU_CAS_ATOMIC(_Z17__spirv_AtomicXor, int, i, __CLC_XOR)
AMDGPU_CAS_ATOMIC(_Z17__spirv_AtomicXor, unsigned int, j, __CLC_XOR)
AMDGPU_CAS_ATOMIC(_Z17__spirv_AtomicXor, long, l, __CLC_XOR)
AMDGPU_CAS_ATOMIC(_Z17__spirv_AtomicXor, unsigned long, m, __CLC_XOR)

#undef __CLC_XOR
#undef AMDGPU_ATOMIC
#undef AMDGPU_ATOMIC_IMPL
#undef AMDGPU_ARCH_GEQ
Expand Down

0 comments on commit c1ef658

Please sign in to comment.