Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[CUDA][LIBCLC] Implement RC11 seq_cst for PTX6.0 #12516

Merged
merged 15 commits into from
Mar 18, 2024
Merged
7 changes: 7 additions & 0 deletions libclc/ptx-nvidiacl/libspirv/atomic/atomic_add.cl
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,13 @@ Memory order is stored in the lowest 5 bits */
ADDR_SPACE, ADDR_SPACE_NV) \
} \
break; \
case SequentiallyConsistent: \
if (__clc_nvvm_reflect_arch() >= 700) { \
__CLC_NVVM_FENCE_SC_SM70() \
__CLC_NVVM_ATOMIC_IMPL_ORDER(double, double, d, add, ADDR_SPACE, \
ADDR_SPACE_NV, _acq_rel) \
break; \
} \
} \
__builtin_trap(); \
__builtin_unreachable(); \
Expand Down
8 changes: 8 additions & 0 deletions libclc/ptx-nvidiacl/libspirv/atomic/atomic_cmpxchg.cl
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
//
//===----------------------------------------------------------------------===//

#include <atomic_helpers.h>
#include <spirv/spirv.h>
#include <spirv/spirv_types.h>

Expand Down Expand Up @@ -120,6 +121,13 @@ Memory order is stored in the lowest 5 bits */ \
TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, ADDR_SPACE, ADDR_SPACE_NV) \
} \
break; \
case SequentiallyConsistent: \
if (__clc_nvvm_reflect_arch() >= 700) { \
__CLC_NVVM_FENCE_SC_SM70() \
__CLC_NVVM_ATOMIC_CAS_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, \
ADDR_SPACE, ADDR_SPACE_NV, _acq_rel) \
break; \
} \
} \
__builtin_trap(); \
__builtin_unreachable(); \
Expand Down
16 changes: 16 additions & 0 deletions libclc/ptx-nvidiacl/libspirv/atomic/atomic_helpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,15 @@ _CLC_OVERLOAD _CLC_DECL void __spirv_MemoryBarrier(unsigned int, unsigned int);
} \
}

#define __CLC_NVVM_FENCE_SC_SM70() \
if (scope == CrossDevice) { \
__asm__ __volatile__("fence.sc.sys;"); \
} else if (scope == Device) { \
__asm__ __volatile__("fence.sc.gpu;"); \
} else { \
__asm__ __volatile__("fence.sc.cta;"); \
GeorgeWeb marked this conversation as resolved.
Show resolved Hide resolved
}

#define __CLC_NVVM_ATOMIC_IMPL( \
TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, OP, NAME_MANGLED, \
ADDR_SPACE, POINTER_AND_ADDR_SPACE_MANGLED, ADDR_SPACE_NV, SUBSTITUTION) \
Expand Down Expand Up @@ -117,6 +126,13 @@ Memory order is stored in the lowest 5 bits */ \
OP, ADDR_SPACE, ADDR_SPACE_NV) \
} \
break; \
case SequentiallyConsistent: \
if (__clc_nvvm_reflect_arch() >= 700) { \
__CLC_NVVM_FENCE_SC_SM70() \
__CLC_NVVM_ATOMIC_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, \
ADDR_SPACE, ADDR_SPACE_NV, _acq_rel) \
break; \
} \
} \
__builtin_trap(); \
__builtin_unreachable(); \
Expand Down
7 changes: 7 additions & 0 deletions libclc/ptx-nvidiacl/libspirv/atomic/atomic_load.cl
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
//
//===----------------------------------------------------------------------===//

#include <atomic_helpers.h>
#include <spirv/spirv.h>
#include <spirv/spirv_types.h>

Expand Down Expand Up @@ -53,6 +54,12 @@ Memory order is stored in the lowest 5 bits */ \
case Acquire: \
__CLC_NVVM_ATOMIC_LOAD_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, \
ADDR_SPACE, ADDR_SPACE_NV, _acquire) \
break; \
case SequentiallyConsistent: \
__CLC_NVVM_FENCE_SC_SM70() \
__CLC_NVVM_ATOMIC_LOAD_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, \
ADDR_SPACE, ADDR_SPACE_NV, _acquire) \
break; \
} \
} else { \
TYPE_NV res = __nvvm_volatile_ld##ADDR_SPACE_NV##TYPE_MANGLED_NV( \
Expand Down
8 changes: 8 additions & 0 deletions libclc/ptx-nvidiacl/libspirv/atomic/atomic_store.cl
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
//
//===----------------------------------------------------------------------===//

#include <atomic_helpers.h>
#include <spirv/spirv.h>
#include <spirv/spirv_types.h>

Expand Down Expand Up @@ -54,6 +55,13 @@ Memory order is stored in the lowest 5 bits */ \
__CLC_NVVM_ATOMIC_STORE_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, \
ADDR_SPACE, ADDR_SPACE_NV, \
_release) \
break; \
case SequentiallyConsistent: \
__CLC_NVVM_FENCE_SC_SM70() \
__CLC_NVVM_ATOMIC_STORE_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, \
ADDR_SPACE, ADDR_SPACE_NV, \
_release) \
break; \
} \
} else { \
switch (order) { \
Expand Down
17 changes: 15 additions & 2 deletions sycl/plugins/unified_runtime/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -56,14 +56,27 @@ endif()
if(SYCL_PI_UR_USE_FETCH_CONTENT)
include(FetchContent)

set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime")
#<<<<<<< HEAD
ldrumm marked this conversation as resolved.
Show resolved Hide resolved
#set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime")
set(UNIFIED_RUNTIME_REPO "https://github.com/JackAKirk/unified-runtime.git")

# commit 51d7180c344bbc2f942533e5fc51b0b04871f8d5
# Merge: b66cf9b1 0e37380e
# Author: Kenneth Benzie (Benie) <k.benzie@codeplay.com>
# Date: Fri Jan 26 12:20:20 2024 +0000
# Merge pull request #1205 from ykhatav/ur_dependentload
# [UR] add dependent-load flag to exclude CWD from default search path …
set(UNIFIED_RUNTIME_TAG d0e7f0e90e934e8c2d87f3f48e1d9b009b821827)
#=======
# set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime")
# commit 73d85ef9f48ec4d1f213066a31ed7e4402b5499b
# Merge: e46dc359 7985d3ee
# Author: Kenneth Benzie (Benie) <k.benzie@codeplay.com>
# Date: Mon Jan 29 14:26:08 2024 +0000
# Merge pull request #1289 from nrspruit/fix_multiDevice
# [L0] Fix native kernel usage, multi device kernel pointer and WorkSize
set(UNIFIED_RUNTIME_TAG 73d85ef9f48ec4d1f213066a31ed7e4402b5499b)
# set(UNIFIED_RUNTIME_TAG 73d85ef9f48ec4d1f213066a31ed7e4402b5499b)
#>>>>>>> sycl

if(SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO)
set(UNIFIED_RUNTIME_REPO "${SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO}")
Expand Down
6 changes: 3 additions & 3 deletions sycl/test-e2e/syclcompat/atomic/atomic_class.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@

// UNSUPPORTED: hip || (windows && level_zero)

// RUN: %clangxx -std=c++20 -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out
// RUN: %clangxx -std=c++20 -fsycl -fsycl-targets=%{sycl_triple} %if any-device-is-cuda %{ -Xsycl-target-backend --cuda-gpu-arch=sm_70 %} %s -o %t.out
// RUN: %{run} %t.out

#include <sycl/sycl.hpp>
Expand All @@ -41,8 +41,8 @@
#include "../common.hpp"
#include "atomic_fixt.hpp"

constexpr size_t numBlocks = 64;
constexpr size_t numThreads = 256;
constexpr size_t numBlocks = 1;
ldrumm marked this conversation as resolved.
Show resolved Hide resolved
constexpr size_t numThreads = 1;
constexpr size_t numData = 6;

template <typename T, typename AtomicType>
Expand Down
2 changes: 1 addition & 1 deletion sycl/test-e2e/syclcompat/atomic/atomic_memory_acq_rel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@

// UNSUPPORTED: hip

// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out
// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %if any-device-is-cuda %{ -Xsycl-target-backend --cuda-gpu-arch=sm_70 %} %s -o %t.out
// RUN: %{run} %t.out

#include <iostream>
Expand Down
Loading