From 3a22539be0283a8157533f1eef0e94786bec3cc3 Mon Sep 17 00:00:00 2001 From: Hugh Delaney Date: Tue, 14 Nov 2023 09:38:51 +0000 Subject: [PATCH] Fix race in cmpxchg and change xor to use CAS instead of atomic builtin --- .../libspirv/atomic/atomic_cmpxchg.cl | 19 ++++++++++------ .../libspirv/atomic/atomic_helpers.h | 22 +++++++++++++++++++ .../libspirv/atomic/atomic_xor.cl | 11 ++++++---- 3 files changed, 41 insertions(+), 11 deletions(-) diff --git a/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_cmpxchg.cl b/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_cmpxchg.cl index 0eec08b8f90b1..a5c42c2f4e674 100644 --- a/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_cmpxchg.cl +++ b/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_cmpxchg.cl @@ -23,12 +23,17 @@ memory_order_success) \ GET_ATOMIC_SCOPE_AND_ORDER(scope, atomic_scope, failure_semantics, \ memory_order_failure) \ - TYPE original_val = *p; \ - bool success = __hip_atomic_compare_exchange_strong( \ - p, &expected, desired, memory_order_success, memory_order_failure, \ - atomic_scope); \ - \ - return success ? original_val : *p; \ + __hip_atomic_compare_exchange_strong(p, &expected, desired, \ + memory_order_success, \ + memory_order_failure, atomic_scope); \ + /* If cmpxchg \ + * succeeds: \ + - `expected` is unchanged, holding the old val that was at `p` \ + - `p` is changed to hold `desired` \ + * fails: \ + - `expected` is changed to hold the current val at `p` \ + - `p` is unchanged*/ \ + return expected; \ } #define AMDGPU_ATOMIC_CMPXCHG(TYPE, TYPE_MANGLED) \ @@ -37,7 +42,7 @@ AMDGPU_ATOMIC_CMPXCHG_IMPL(TYPE, TYPE_MANGLED, , , 0, 4) AMDGPU_ATOMIC_CMPXCHG(int, i) -AMDGPU_ATOMIC_CMPXCHG(unsigned int, j) +AMDGPU_ATOMIC_CMPXCHG(unsigned, j) AMDGPU_ATOMIC_CMPXCHG(long, l) AMDGPU_ATOMIC_CMPXCHG(unsigned long, m) AMDGPU_ATOMIC_CMPXCHG(float, f) diff --git a/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_helpers.h b/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_helpers.h index 8d21737889f3b..ea4e90c0ae3aa 100644 --- a/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_helpers.h +++ b/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_helpers.h @@ -71,3 +71,25 @@ AMDGPU_ATOMIC_IMPL(FUNC_NAME, TYPE, TYPE_MANGLED, global, U3AS1, 1, BUILTIN) \ AMDGPU_ATOMIC_IMPL(FUNC_NAME, TYPE, TYPE_MANGLED, local, U3AS3, 1, BUILTIN) \ AMDGPU_ATOMIC_IMPL(FUNC_NAME, TYPE, TYPE_MANGLED, , , 0, BUILTIN) + +#define AMDGPU_CAS_ATOMIC_IMPL(FUNC_NAME, TYPE, TYPE_MANGLED, AS, AS_MANGLED, \ + SUB1, OP) \ + _CLC_DEF TYPE \ + FUNC_NAME##P##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS##SUB1##_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ + volatile AS TYPE *p, enum Scope scope, \ + enum MemorySemanticsMask semantics, TYPE val) { \ + int atomic_scope = 0, memory_order = 0; \ + GET_ATOMIC_SCOPE_AND_ORDER(scope, atomic_scope, semantics, memory_order) \ + TYPE oldval = __hip_atomic_load(p, memory_order, atomic_scope); \ + TYPE newval = 0; \ + do { \ + newval = oldval OP val; \ + } while (!__hip_atomic_compare_exchange_strong( \ + p, &oldval, newval, atomic_scope, atomic_scope, memory_order)); \ + return oldval; \ + } + +#define AMDGPU_CAS_ATOMIC(FUNC_NAME, TYPE, TYPE_MANGLED, OP) \ + AMDGPU_CAS_ATOMIC_IMPL(FUNC_NAME, TYPE, TYPE_MANGLED, global, U3AS1, 1, OP) \ + AMDGPU_CAS_ATOMIC_IMPL(FUNC_NAME, TYPE, TYPE_MANGLED, local, U3AS3, 1, OP) \ + AMDGPU_CAS_ATOMIC_IMPL(FUNC_NAME, TYPE, TYPE_MANGLED, , , 0, OP) diff --git a/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_xor.cl b/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_xor.cl index b5aff2b49db62..869164f16f55b 100644 --- a/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_xor.cl +++ b/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_xor.cl @@ -10,11 +10,14 @@ #include #include -AMDGPU_ATOMIC(_Z17__spirv_AtomicXor, int, i, __hip_atomic_fetch_xor) -AMDGPU_ATOMIC(_Z17__spirv_AtomicXor, unsigned int, j, __hip_atomic_fetch_xor) -AMDGPU_ATOMIC(_Z17__spirv_AtomicXor, long, l, __hip_atomic_fetch_xor) -AMDGPU_ATOMIC(_Z17__spirv_AtomicXor, unsigned long, m, __hip_atomic_fetch_xor) +#define __CLC_XOR ^ +AMDGPU_CAS_ATOMIC(_Z17__spirv_AtomicXor, int, i, __CLC_XOR) +AMDGPU_CAS_ATOMIC(_Z17__spirv_AtomicXor, unsigned int, j, __CLC_XOR) +AMDGPU_CAS_ATOMIC(_Z17__spirv_AtomicXor, long, l, __CLC_XOR) +AMDGPU_CAS_ATOMIC(_Z17__spirv_AtomicXor, unsigned long, m, __CLC_XOR) + +#undef __CLC_XOR #undef AMDGPU_ATOMIC #undef AMDGPU_ATOMIC_IMPL #undef AMDGPU_ARCH_GEQ