Skip to content

Commit

Permalink
[SYCL][HIP][CUDA] Use new version of piMemGetNativeHandle and add test (
Browse files Browse the repository at this point in the history
#12297)

We want to change the signature of `piMemGetNativeHandle` for reasons
explained here oneapi-src/unified-runtime#1199

Corresponding UR PR:
oneapi-src/unified-runtime#1226

A previous PR added a new entry point
#12199 but it was decided that it is
better to modify the existing entry point
  • Loading branch information
hdelan committed Feb 1, 2024
1 parent 4fdcb58 commit 8427bd2
Show file tree
Hide file tree
Showing 14 changed files with 188 additions and 31 deletions.
9 changes: 6 additions & 3 deletions sycl/include/sycl/detail/pi.h
Original file line number Diff line number Diff line change
Expand Up @@ -149,9 +149,11 @@
// 14.40 Add HIP _pi_mem_advice alises to match the PI_MEM_ADVICE_CUDA* ones.
// 14.41 Added piextCommandBufferMemBufferFill & piextCommandBufferFillUSM
// 14.42 Added piextCommandBufferPrefetchUSM and piextCommandBufferAdviseUSM
// 15.43 Changed the signature of piextMemGetNativeHandle to also take a
// pi_device

#define _PI_H_VERSION_MAJOR 14
#define _PI_H_VERSION_MINOR 42
#define _PI_H_VERSION_MAJOR 15
#define _PI_H_VERSION_MINOR 43

#define _PI_STRING_HELPER(a) #a
#define _PI_CONCAT(a, b) _PI_STRING_HELPER(a.b)
Expand Down Expand Up @@ -1424,8 +1426,9 @@ __SYCL_EXPORT pi_result piMemBufferPartition(
/// Gets the native handle of a PI mem object.
///
/// \param mem is the PI mem to get the native handle of.
/// \param dev is the PI device that the native allocation will be resident on
/// \param nativeHandle is the native handle of mem.
__SYCL_EXPORT pi_result piextMemGetNativeHandle(pi_mem mem,
__SYCL_EXPORT pi_result piextMemGetNativeHandle(pi_mem mem, pi_device dev,
pi_native_handle *nativeHandle);

/// Creates PI mem object from a native handle.
Expand Down
5 changes: 3 additions & 2 deletions sycl/plugins/cuda/pi_cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -233,8 +233,9 @@ pi_result piMemImageCreate(pi_context Context, pi_mem_flags Flags,
HostPtr, RetImage);
}

pi_result piextMemGetNativeHandle(pi_mem Mem, pi_native_handle *NativeHandle) {
return pi2ur::piextMemGetNativeHandle(Mem, NativeHandle);
pi_result piextMemGetNativeHandle(pi_mem Mem, pi_device Dev,
pi_native_handle *NativeHandle) {
return pi2ur::piextMemGetNativeHandle(Mem, Dev, NativeHandle);
}

pi_result piextMemCreateWithNativeHandle(pi_native_handle NativeHandle,
Expand Down
5 changes: 3 additions & 2 deletions sycl/plugins/hip/pi_hip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -236,8 +236,9 @@ pi_result piMemImageCreate(pi_context Context, pi_mem_flags Flags,
HostPtr, RetImage);
}

pi_result piextMemGetNativeHandle(pi_mem Mem, pi_native_handle *NativeHandle) {
return pi2ur::piextMemGetNativeHandle(Mem, NativeHandle);
pi_result piextMemGetNativeHandle(pi_mem Mem, pi_device Dev,
pi_native_handle *NativeHandle) {
return pi2ur::piextMemGetNativeHandle(Mem, Dev, NativeHandle);
}

pi_result piextMemCreateWithNativeHandle(pi_native_handle NativeHandle,
Expand Down
5 changes: 3 additions & 2 deletions sycl/plugins/level_zero/pi_level_zero.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -243,8 +243,9 @@ pi_result piMemImageCreate(pi_context Context, pi_mem_flags Flags,
HostPtr, RetImage);
}

pi_result piextMemGetNativeHandle(pi_mem Mem, pi_native_handle *NativeHandle) {
return pi2ur::piextMemGetNativeHandle(Mem, NativeHandle);
pi_result piextMemGetNativeHandle(pi_mem Mem, pi_device Dev,
pi_native_handle *NativeHandle) {
return pi2ur::piextMemGetNativeHandle(Mem, Dev, NativeHandle);
}

pi_result piextMemCreateWithNativeHandle(pi_native_handle NativeHandle,
Expand Down
5 changes: 3 additions & 2 deletions sycl/plugins/native_cpu/pi_native_cpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -240,8 +240,9 @@ pi_result piMemImageCreate(pi_context Context, pi_mem_flags Flags,
HostPtr, RetImage);
}

pi_result piextMemGetNativeHandle(pi_mem Mem, pi_native_handle *NativeHandle) {
return pi2ur::piextMemGetNativeHandle(Mem, NativeHandle);
pi_result piextMemGetNativeHandle(pi_mem Mem, pi_device Dev,
pi_native_handle *NativeHandle) {
return pi2ur::piextMemGetNativeHandle(Mem, Dev, NativeHandle);
}

pi_result piextMemCreateWithNativeHandle(pi_native_handle NativeHandle,
Expand Down
5 changes: 3 additions & 2 deletions sycl/plugins/opencl/pi_opencl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -222,8 +222,9 @@ pi_result piMemImageCreate(pi_context Context, pi_mem_flags Flags,
HostPtr, RetImage);
}

pi_result piextMemGetNativeHandle(pi_mem Mem, pi_native_handle *NativeHandle) {
return pi2ur::piextMemGetNativeHandle(Mem, NativeHandle);
pi_result piextMemGetNativeHandle(pi_mem Mem, pi_device Dev,
pi_native_handle *NativeHandle) {
return pi2ur::piextMemGetNativeHandle(Mem, Dev, NativeHandle);
}

pi_result piextMemCreateWithNativeHandle(pi_native_handle NativeHandle,
Expand Down
14 changes: 7 additions & 7 deletions sycl/plugins/unified_runtime/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -56,14 +56,14 @@ endif()
if(SYCL_PI_UR_USE_FETCH_CONTENT)
include(FetchContent)

set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime")
# commit 3225b822b5d8cbfa85d7fc1bd5a5bf96e5bb8c1a
# Merge: edb281f3 5fc41099
set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git")
# commit d216eb44d5c9fe3433eecdd09b10e3e79ac25bd7
# Merge: 40517d2b fc1f3066
# Author: Kenneth Benzie (Benie) <k.benzie@codeplay.com>
# Date: Tue Jan 30 12:31:44 2024 +0000
# Merge pull request #1168 from Seanst98/sean/unique-addr-mode-per-dim-adapters
# [Bindless][CUDA] Unique addressing modes per dimension
set(UNIFIED_RUNTIME_TAG 3225b822b5d8cbfa85d7fc1bd5a5bf96e5bb8c1a)
# Date: Wed Jan 31 10:38:07 2024 +0000
# Merge pull request #1226 from hdelan/get-native-mem-on-device2
# [UR] Add extra param to urMemGetNativeHandle
set(UNIFIED_RUNTIME_TAG d216eb44d5c9fe3433eecdd09b10e3e79ac25bd7)

if(SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO)
set(UNIFIED_RUNTIME_REPO "${SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO}")
Expand Down
5 changes: 3 additions & 2 deletions sycl/plugins/unified_runtime/pi2ur.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3081,13 +3081,14 @@ inline pi_result piMemBufferPartition(pi_mem Buffer, pi_mem_flags Flags,
return PI_SUCCESS;
}

inline pi_result piextMemGetNativeHandle(pi_mem Mem,
inline pi_result piextMemGetNativeHandle(pi_mem Mem, pi_device Dev,
pi_native_handle *NativeHandle) {
PI_ASSERT(Mem, PI_ERROR_INVALID_MEM_OBJECT);

ur_mem_handle_t UrMem = reinterpret_cast<ur_mem_handle_t>(Mem);
ur_device_handle_t UrDev = reinterpret_cast<ur_device_handle_t>(Dev);
ur_native_handle_t NativeMem{};
HANDLE_ERRORS(urMemGetNativeHandle(UrMem, &NativeMem));
HANDLE_ERRORS(urMemGetNativeHandle(UrMem, UrDev, &NativeMem));

*NativeHandle = reinterpret_cast<pi_native_handle>(NativeMem);

Expand Down
6 changes: 3 additions & 3 deletions sycl/plugins/unified_runtime/pi_unified_runtime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -235,9 +235,9 @@ __SYCL_EXPORT pi_result piMemBufferPartition(
BufferCreateInfo, RetMem);
}

__SYCL_EXPORT pi_result
piextMemGetNativeHandle(pi_mem Mem, pi_native_handle *NativeHandle) {
return pi2ur::piextMemGetNativeHandle(Mem, NativeHandle);
__SYCL_EXPORT pi_result piextMemGetNativeHandle(
pi_mem Mem, pi_device Dev, pi_native_handle *NativeHandle) {
return pi2ur::piextMemGetNativeHandle(Mem, Dev, NativeHandle);
}

__SYCL_EXPORT pi_result
Expand Down
6 changes: 5 additions & 1 deletion sycl/source/detail/buffer_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,11 @@ buffer_impl::getNativeVector(backend BackendName) const {
}

pi_native_handle Handle;
Plugin->call<PiApiKind::piextMemGetNativeHandle>(NativeMem, &Handle);
// When doing buffer interop we don't know what device the memory should be
// resident on, so pass nullptr for Device param. Buffer interop may not be
// supported by all backends.
Plugin->call<PiApiKind::piextMemGetNativeHandle>(NativeMem, /*Dev*/ nullptr,
&Handle);
Handles.push_back(Handle);
}

Expand Down
12 changes: 10 additions & 2 deletions sycl/source/detail/memory_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,11 @@ void memBufferCreateHelper(const PluginPtr &Plugin, pi_context Ctx,
// Always use call_nocheck here, because call may throw an exception,
// and this lambda will be called from destructor, which in combination
// rewards us with UB.
Plugin->call_nocheck<PiApiKind::piextMemGetNativeHandle>(*RetMem, &Ptr);
// When doing buffer interop we don't know what device the memory should
// be resident on, so pass nullptr for Device param. Buffer interop may
// not be supported by all backends.
Plugin->call_nocheck<PiApiKind::piextMemGetNativeHandle>(
*RetMem, /*Dev*/ nullptr, &Ptr);
emitMemAllocEndTrace(MemObjID, (uintptr_t)(Ptr), Size, 0 /* guard zone */,
CorrID);
}};
Expand All @@ -167,7 +171,11 @@ void memReleaseHelper(const PluginPtr &Plugin, pi_mem Mem) {
// Do not make unnecessary PI calls without instrumentation enabled
if (xptiTraceEnabled()) {
pi_native_handle PtrHandle = 0;
Plugin->call<PiApiKind::piextMemGetNativeHandle>(Mem, &PtrHandle);
// When doing buffer interop we don't know what device the memory should be
// resident on, so pass nullptr for Device param. Buffer interop may not be
// supported by all backends.
Plugin->call<PiApiKind::piextMemGetNativeHandle>(Mem, /*Dev*/ nullptr,
&PtrHandle);
Ptr = (uintptr_t)(PtrHandle);
}
#endif
Expand Down
4 changes: 2 additions & 2 deletions sycl/source/interop_handle.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@ pi_native_handle interop_handle::getNativeMem(detail::Requirement *Req) const {

auto Plugin = MQueue->getPlugin();
pi_native_handle Handle;
Plugin->call<detail::PiApiKind::piextMemGetNativeHandle>(Iter->second,
&Handle);
Plugin->call<detail::PiApiKind::piextMemGetNativeHandle>(
Iter->second, MDevice->getHandleRef(), &Handle);
return Handle;
}

Expand Down
136 changes: 136 additions & 0 deletions sycl/test-e2e/HostInteropTask/interop-task-hip.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
// FIXME: the rocm include path and link path are highly platform dependent,
// we should set this with some variable instead.
// RUN: %{build} -o %t.out -I/opt/rocm/include -L/opt/rocm/lib -lamdhip64
// RUN: %{run} %t.out
// REQUIRES: hip

#include <iostream>
#include <sycl/sycl.hpp>

#define __HIP_PLATFORM_AMD__

#include <hip/hip_runtime.h>

using namespace sycl;
using namespace sycl::access;

static constexpr size_t BUFFER_SIZE = 1024;

template <typename T> class Modifier;

template <typename T> class Init;

template <typename BufferT, typename ValueT>
void checkBufferValues(BufferT Buffer, ValueT Value) {
auto Acc = Buffer.get_host_access();
for (size_t Idx = 0; Idx < Acc.get_count(); ++Idx) {
if (Acc[Idx] != Value) {
std::cerr << "buffer[" << Idx << "] = " << Acc[Idx]
<< ", expected val = " << Value << '\n';
exit(1);
}
}
}

template <typename DataT>
void copy(buffer<DataT, 1> &Src, buffer<DataT, 1> &Dst, queue &Q) {
Q.submit([&](handler &CGH) {
auto SrcA = Src.template get_access<mode::read>(CGH);
auto DstA = Dst.template get_access<mode::write>(CGH);

auto Func = [=](interop_handle IH) {
auto HipStream = IH.get_native_queue<backend::ext_oneapi_hip>();
auto SrcMem = IH.get_native_mem<backend::ext_oneapi_hip>(SrcA);
auto DstMem = IH.get_native_mem<backend::ext_oneapi_hip>(DstA);

if (hipMemcpyWithStream(DstMem, SrcMem, sizeof(DataT) * SrcA.get_count(),
hipMemcpyDefault, HipStream) != hipSuccess) {
throw;
}

if (hipStreamSynchronize(HipStream) != hipSuccess) {
throw;
}

if (Q.get_backend() != IH.get_backend())
throw;
};
CGH.host_task(Func);
});
}

template <typename DataT> void modify(buffer<DataT, 1> &B, queue &Q) {
Q.submit([&](handler &CGH) {
auto Acc = B.template get_access<mode::read_write>(CGH);

auto Kernel = [=](item<1> Id) { Acc[Id] += 1; };

CGH.parallel_for<Modifier<DataT>>(Acc.get_count(), Kernel);
});
}

template <typename DataT, DataT B1Init, DataT B2Init>
void init(buffer<DataT, 1> &B1, buffer<DataT, 1> &B2, queue &Q) {
Q.submit([&](handler &CGH) {
auto Acc1 = B1.template get_access<mode::write>(CGH);
auto Acc2 = B2.template get_access<mode::write>(CGH);

CGH.parallel_for<Init<DataT>>(BUFFER_SIZE, [=](item<1> Id) {
Acc1[Id] = B1Init;
Acc2[Id] = B2Init;
});
});
}

// Check that a single host-interop-task with a buffer will work.
void test_ht_buffer(queue &Q) {
buffer<int, 1> Buffer{BUFFER_SIZE};

Q.submit([&](handler &CGH) {
auto Acc = Buffer.get_access<mode::write>(CGH);
auto Func = [=](interop_handle IH) { /*A no-op */ };
CGH.host_task(Func);
});
}

// A test that uses HIP interop to copy data from buffer A to buffer B, by
// getting HIP ptrs and calling the hipMemcpyWithStream. Then run a SYCL
// kernel that modifies the data in place for B, e.g. increment one, then copy
// back to buffer A. Run it on a loop, to ensure the dependencies and the
// reference counting of the objects is not leaked.
void test_ht_kernel_dependencies(queue &Q) {
static constexpr int COUNT = 4;
buffer<int, 1> Buffer1{BUFFER_SIZE};
buffer<int, 1> Buffer2{BUFFER_SIZE};

// Init the buffer with a'priori invalid data.
init<int, -1, -2>(Buffer1, Buffer2, Q);

// Repeat a couple of times.
for (size_t Idx = 0; Idx < COUNT; ++Idx) {
copy(Buffer1, Buffer2, Q);
modify(Buffer2, Q);
copy(Buffer2, Buffer1, Q);
}

checkBufferValues(Buffer1, COUNT - 1);
checkBufferValues(Buffer2, COUNT - 1);
}

void tests(queue &Q) {
test_ht_buffer(Q);
test_ht_kernel_dependencies(Q);
}

int main() {
queue Q([](sycl::exception_list ExceptionList) {
if (ExceptionList.size() != 1) {
std::cerr << "Should be one exception in exception list" << std::endl;
std::abort();
}
std::rethrow_exception(*ExceptionList.begin());
});
tests(Q);
std::cout << "Test PASSED" << std::endl;
return 0;
}
2 changes: 1 addition & 1 deletion sycl/unittests/helpers/PiMockPlugin.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -613,7 +613,7 @@ mock_piMemBufferPartition(pi_mem buffer, pi_mem_flags flags,
return PI_SUCCESS;
}

inline pi_result mock_piextMemGetNativeHandle(pi_mem mem,
inline pi_result mock_piextMemGetNativeHandle(pi_mem mem, pi_device dev,
pi_native_handle *nativeHandle) {
*nativeHandle = reinterpret_cast<pi_native_handle>(mem);
return PI_SUCCESS;
Expand Down

0 comments on commit 8427bd2

Please sign in to comment.