Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SYCL][HIP][CUDA] Use new version of piMemGetNativeHandle and add test #12297

Merged
merged 9 commits into from
Feb 1, 2024
9 changes: 6 additions & 3 deletions sycl/include/sycl/detail/pi.h
Original file line number Diff line number Diff line change
Expand Up @@ -149,9 +149,11 @@
// 14.40 Add HIP _pi_mem_advice alises to match the PI_MEM_ADVICE_CUDA* ones.
// 14.41 Added piextCommandBufferMemBufferFill & piextCommandBufferFillUSM
// 14.42 Added piextCommandBufferPrefetchUSM and piextCommandBufferAdviseUSM
// 15.43 Changed the signature of piextMemGetNativeHandle to also take a
// pi_device

#define _PI_H_VERSION_MAJOR 14
#define _PI_H_VERSION_MINOR 42
#define _PI_H_VERSION_MAJOR 15
#define _PI_H_VERSION_MINOR 43

#define _PI_STRING_HELPER(a) #a
#define _PI_CONCAT(a, b) _PI_STRING_HELPER(a.b)
Expand Down Expand Up @@ -1424,8 +1426,9 @@ __SYCL_EXPORT pi_result piMemBufferPartition(
/// Gets the native handle of a PI mem object.
///
/// \param mem is the PI mem to get the native handle of.
/// \param dev is the PI device that the native allocation will be resident on
/// \param nativeHandle is the native handle of mem.
__SYCL_EXPORT pi_result piextMemGetNativeHandle(pi_mem mem,
hdelan marked this conversation as resolved.
Show resolved Hide resolved
__SYCL_EXPORT pi_result piextMemGetNativeHandle(pi_mem mem, pi_device dev,
pi_native_handle *nativeHandle);

/// Creates PI mem object from a native handle.
Expand Down
5 changes: 3 additions & 2 deletions sycl/plugins/cuda/pi_cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -228,8 +228,9 @@ pi_result piMemImageCreate(pi_context Context, pi_mem_flags Flags,
HostPtr, RetImage);
}

pi_result piextMemGetNativeHandle(pi_mem Mem, pi_native_handle *NativeHandle) {
return pi2ur::piextMemGetNativeHandle(Mem, NativeHandle);
pi_result piextMemGetNativeHandle(pi_mem Mem, pi_device Dev,
pi_native_handle *NativeHandle) {
return pi2ur::piextMemGetNativeHandle(Mem, Dev, NativeHandle);
}

pi_result piextMemCreateWithNativeHandle(pi_native_handle NativeHandle,
Expand Down
5 changes: 3 additions & 2 deletions sycl/plugins/hip/pi_hip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -236,8 +236,9 @@ pi_result piMemImageCreate(pi_context Context, pi_mem_flags Flags,
HostPtr, RetImage);
}

pi_result piextMemGetNativeHandle(pi_mem Mem, pi_native_handle *NativeHandle) {
return pi2ur::piextMemGetNativeHandle(Mem, NativeHandle);
pi_result piextMemGetNativeHandle(pi_mem Mem, pi_device Dev,
pi_native_handle *NativeHandle) {
return pi2ur::piextMemGetNativeHandle(Mem, Dev, NativeHandle);
}

pi_result piextMemCreateWithNativeHandle(pi_native_handle NativeHandle,
Expand Down
5 changes: 3 additions & 2 deletions sycl/plugins/level_zero/pi_level_zero.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -243,8 +243,9 @@ pi_result piMemImageCreate(pi_context Context, pi_mem_flags Flags,
HostPtr, RetImage);
}

pi_result piextMemGetNativeHandle(pi_mem Mem, pi_native_handle *NativeHandle) {
return pi2ur::piextMemGetNativeHandle(Mem, NativeHandle);
pi_result piextMemGetNativeHandle(pi_mem Mem, pi_device Dev,
pi_native_handle *NativeHandle) {
return pi2ur::piextMemGetNativeHandle(Mem, Dev, NativeHandle);
}

pi_result piextMemCreateWithNativeHandle(pi_native_handle NativeHandle,
Expand Down
5 changes: 3 additions & 2 deletions sycl/plugins/native_cpu/pi_native_cpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -240,8 +240,9 @@ pi_result piMemImageCreate(pi_context Context, pi_mem_flags Flags,
HostPtr, RetImage);
}

pi_result piextMemGetNativeHandle(pi_mem Mem, pi_native_handle *NativeHandle) {
return pi2ur::piextMemGetNativeHandle(Mem, NativeHandle);
pi_result piextMemGetNativeHandle(pi_mem Mem, pi_device Dev,
pi_native_handle *NativeHandle) {
return pi2ur::piextMemGetNativeHandle(Mem, Dev, NativeHandle);
}

pi_result piextMemCreateWithNativeHandle(pi_native_handle NativeHandle,
Expand Down
5 changes: 3 additions & 2 deletions sycl/plugins/opencl/pi_opencl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -222,8 +222,9 @@ pi_result piMemImageCreate(pi_context Context, pi_mem_flags Flags,
HostPtr, RetImage);
}

pi_result piextMemGetNativeHandle(pi_mem Mem, pi_native_handle *NativeHandle) {
return pi2ur::piextMemGetNativeHandle(Mem, NativeHandle);
pi_result piextMemGetNativeHandle(pi_mem Mem, pi_device Dev,
pi_native_handle *NativeHandle) {
return pi2ur::piextMemGetNativeHandle(Mem, Dev, NativeHandle);
}

pi_result piextMemCreateWithNativeHandle(pi_native_handle NativeHandle,
Expand Down
14 changes: 7 additions & 7 deletions sycl/plugins/unified_runtime/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -56,14 +56,14 @@ endif()
if(SYCL_PI_UR_USE_FETCH_CONTENT)
include(FetchContent)

set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime")
# commit 3225b822b5d8cbfa85d7fc1bd5a5bf96e5bb8c1a
# Merge: edb281f3 5fc41099
set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git")
# commit d216eb44d5c9fe3433eecdd09b10e3e79ac25bd7
# Merge: 40517d2b fc1f3066
# Author: Kenneth Benzie (Benie) <k.benzie@codeplay.com>
# Date: Tue Jan 30 12:31:44 2024 +0000
# Merge pull request #1168 from Seanst98/sean/unique-addr-mode-per-dim-adapters
# [Bindless][CUDA] Unique addressing modes per dimension
set(UNIFIED_RUNTIME_TAG 3225b822b5d8cbfa85d7fc1bd5a5bf96e5bb8c1a)
# Date: Wed Jan 31 10:38:07 2024 +0000
# Merge pull request #1226 from hdelan/get-native-mem-on-device2
# [UR] Add extra param to urMemGetNativeHandle
set(UNIFIED_RUNTIME_TAG d216eb44d5c9fe3433eecdd09b10e3e79ac25bd7)

if(SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO)
set(UNIFIED_RUNTIME_REPO "${SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO}")
Expand Down
5 changes: 3 additions & 2 deletions sycl/plugins/unified_runtime/pi2ur.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3081,13 +3081,14 @@ inline pi_result piMemBufferPartition(pi_mem Buffer, pi_mem_flags Flags,
return PI_SUCCESS;
}

inline pi_result piextMemGetNativeHandle(pi_mem Mem,
inline pi_result piextMemGetNativeHandle(pi_mem Mem, pi_device Dev,
pi_native_handle *NativeHandle) {
PI_ASSERT(Mem, PI_ERROR_INVALID_MEM_OBJECT);

ur_mem_handle_t UrMem = reinterpret_cast<ur_mem_handle_t>(Mem);
ur_device_handle_t UrDev = reinterpret_cast<ur_device_handle_t>(Dev);
ur_native_handle_t NativeMem{};
HANDLE_ERRORS(urMemGetNativeHandle(UrMem, &NativeMem));
HANDLE_ERRORS(urMemGetNativeHandle(UrMem, UrDev, &NativeMem));

*NativeHandle = reinterpret_cast<pi_native_handle>(NativeMem);

Expand Down
6 changes: 3 additions & 3 deletions sycl/plugins/unified_runtime/pi_unified_runtime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -235,9 +235,9 @@ __SYCL_EXPORT pi_result piMemBufferPartition(
BufferCreateInfo, RetMem);
}

__SYCL_EXPORT pi_result
piextMemGetNativeHandle(pi_mem Mem, pi_native_handle *NativeHandle) {
return pi2ur::piextMemGetNativeHandle(Mem, NativeHandle);
__SYCL_EXPORT pi_result piextMemGetNativeHandle(
pi_mem Mem, pi_device Dev, pi_native_handle *NativeHandle) {
return pi2ur::piextMemGetNativeHandle(Mem, Dev, NativeHandle);
}

__SYCL_EXPORT pi_result
Expand Down
3 changes: 2 additions & 1 deletion sycl/source/detail/buffer_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,8 @@ buffer_impl::getNativeVector(backend BackendName) const {
}

pi_native_handle Handle;
Plugin->call<PiApiKind::piextMemGetNativeHandle>(NativeMem, &Handle);
Plugin->call<PiApiKind::piextMemGetNativeHandle>(NativeMem, nullptr,
hdelan marked this conversation as resolved.
Show resolved Hide resolved
&Handle);
Handles.push_back(Handle);
}

Expand Down
5 changes: 3 additions & 2 deletions sycl/source/detail/memory_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,8 @@ void memBufferCreateHelper(const PluginPtr &Plugin, pi_context Ctx,
// Always use call_nocheck here, because call may throw an exception,
// and this lambda will be called from destructor, which in combination
// rewards us with UB.
Plugin->call_nocheck<PiApiKind::piextMemGetNativeHandle>(*RetMem, &Ptr);
Plugin->call_nocheck<PiApiKind::piextMemGetNativeHandle>(*RetMem, nullptr,
hdelan marked this conversation as resolved.
Show resolved Hide resolved
&Ptr);
emitMemAllocEndTrace(MemObjID, (uintptr_t)(Ptr), Size, 0 /* guard zone */,
CorrID);
}};
Expand All @@ -167,7 +168,7 @@ void memReleaseHelper(const PluginPtr &Plugin, pi_mem Mem) {
// Do not make unnecessary PI calls without instrumentation enabled
if (xptiTraceEnabled()) {
pi_native_handle PtrHandle = 0;
Plugin->call<PiApiKind::piextMemGetNativeHandle>(Mem, &PtrHandle);
Plugin->call<PiApiKind::piextMemGetNativeHandle>(Mem, nullptr, &PtrHandle);
hdelan marked this conversation as resolved.
Show resolved Hide resolved
Ptr = (uintptr_t)(PtrHandle);
}
#endif
Expand Down
4 changes: 2 additions & 2 deletions sycl/source/interop_handle.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@ pi_native_handle interop_handle::getNativeMem(detail::Requirement *Req) const {

auto Plugin = MQueue->getPlugin();
pi_native_handle Handle;
Plugin->call<detail::PiApiKind::piextMemGetNativeHandle>(Iter->second,
&Handle);
Plugin->call<detail::PiApiKind::piextMemGetNativeHandle>(
Iter->second, MDevice->getHandleRef(), &Handle);
return Handle;
}

Expand Down
175 changes: 175 additions & 0 deletions sycl/test-e2e/HostInteropTask/interop-task-hip.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
// FIXME: the rocm include path and link path are highly platform dependent,
// we should set this with some variable instead.
// RUN: %{build} -o %t.out -I/opt/rocm/include -L/opt/rocm/lib -lamdhip64
// RUN: %{run} %t.out
// REQUIRES: hip

#include <iostream>
#include <sycl/sycl.hpp>

#define __HIP_PLATFORM_AMD__
hdelan marked this conversation as resolved.
Show resolved Hide resolved

#include <hip/hip_runtime.h>

using namespace sycl;
using namespace sycl::access;

static constexpr size_t BUFFER_SIZE = 1024;

template <typename T> class Modifier;

template <typename T> class Init;

template <typename BufferT, typename ValueT>
void checkBufferValues(BufferT Buffer, ValueT Value) {
auto Acc = Buffer.get_host_access();
for (size_t Idx = 0; Idx < Acc.get_count(); ++Idx) {
if (Acc[Idx] != Value) {
std::cerr << "buffer[" << Idx << "] = " << Acc[Idx]
<< ", expected val = " << Value << std::endl;
hdelan marked this conversation as resolved.
Show resolved Hide resolved
assert(0 && "Invalid data in the buffer");
hdelan marked this conversation as resolved.
Show resolved Hide resolved
}
}
}

template <typename DataT>
void copy(buffer<DataT, 1> &Src, buffer<DataT, 1> &Dst, queue &Q) {
Q.submit([&](handler &CGH) {
auto SrcA = Src.template get_access<mode::read>(CGH);
auto DstA = Dst.template get_access<mode::write>(CGH);

auto Func = [=](interop_handle IH) {
auto HipStream = IH.get_native_queue<backend::ext_oneapi_hip>();
auto SrcMem = IH.get_native_mem<backend::ext_oneapi_hip>(SrcA);
auto DstMem = IH.get_native_mem<backend::ext_oneapi_hip>(DstA);

if (hipMemcpyWithStream(DstMem, SrcMem, sizeof(DataT) * SrcA.get_count(),
hipMemcpyDefault, HipStream) != hipSuccess) {
throw;
}

if (hipStreamSynchronize(HipStream) != hipSuccess) {
throw;
}

if (Q.get_backend() != IH.get_backend())
throw;
};
CGH.host_task(Func);
});
}

template <typename DataT> void modify(buffer<DataT, 1> &B, queue &Q) {
Q.submit([&](handler &CGH) {
auto Acc = B.template get_access<mode::read_write>(CGH);

auto Kernel = [=](item<1> Id) { Acc[Id] += 1; };

CGH.parallel_for<Modifier<DataT>>(Acc.get_count(), Kernel);
});
}

template <typename DataT, DataT B1Init, DataT B2Init>
void init(buffer<DataT, 1> &B1, buffer<DataT, 1> &B2, queue &Q) {
Q.submit([&](handler &CGH) {
auto Acc1 = B1.template get_access<mode::write>(CGH);
auto Acc2 = B2.template get_access<mode::write>(CGH);

CGH.parallel_for<Init<DataT>>(BUFFER_SIZE, [=](item<1> Id) {
Acc1[Id] = B1Init;
Acc2[Id] = B2Init;
});
});
}

// A test that uses HIP interop to copy data from buffer A to buffer B, by
// getting HIP ptrs and calling the hipMemcpyWithStream. Then run a SYCL
// kernel that modifies the data in place for B, e.g. increment one, then copy
// back to buffer A. Run it on a loop, to ensure the dependencies and the
// reference counting of the objects is not leaked.
void test1(queue &Q) {
static constexpr int COUNT = 4;
buffer<int, 1> Buffer1{BUFFER_SIZE};
buffer<int, 1> Buffer2{BUFFER_SIZE};

// Init the buffer with a'priori invalid data.
init<int, -1, -2>(Buffer1, Buffer2, Q);

// Repeat a couple of times.
for (size_t Idx = 0; Idx < COUNT; ++Idx) {
copy(Buffer1, Buffer2, Q);
modify(Buffer2, Q);
copy(Buffer2, Buffer1, Q);
}

checkBufferValues(Buffer1, COUNT - 1);
checkBufferValues(Buffer2, COUNT - 1);
}

// Same as above, but performing each command group on a separate SYCL queue
// (on the same or different devices). This ensures the dependency tracking
// works well but also there is no accidental side effects on other queues.
void test2(queue &Q) {
static constexpr int COUNT = 4;
buffer<int, 1> Buffer1{BUFFER_SIZE};
buffer<int, 1> Buffer2{BUFFER_SIZE};

// Init the buffer with a'priori invalid data.
init<int, -1, -2>(Buffer1, Buffer2, Q);

// Repeat a couple of times.
for (size_t Idx = 0; Idx < COUNT; ++Idx) {
copy(Buffer1, Buffer2, Q);
modify(Buffer2, Q);
copy(Buffer2, Buffer1, Q);
}
checkBufferValues(Buffer1, COUNT - 1);
checkBufferValues(Buffer2, COUNT - 1);
}

// Check that a single host-interop-task with a buffer will work.
void test3(queue &Q) {
buffer<int, 1> Buffer{BUFFER_SIZE};

Q.submit([&](handler &CGH) {
auto Acc = Buffer.get_access<mode::write>(CGH);
auto Func = [=](interop_handle IH) { /*A no-op */ };
CGH.host_task(Func);
});
}

void test4(queue &Q) {
ldrumm marked this conversation as resolved.
Show resolved Hide resolved
buffer<int, 1> Buffer1{BUFFER_SIZE};
buffer<int, 1> Buffer2{BUFFER_SIZE};

Q.submit([&](handler &CGH) {
auto Acc = Buffer1.template get_access<mode::write>(CGH);

auto Kernel = [=](item<1> Id) { Acc[Id] = 123; };
CGH.parallel_for<class Test5Init>(Acc.get_count(), Kernel);
});

copy(Buffer1, Buffer2, Q);

checkBufferValues(Buffer2, static_cast<int>(123));
}

void tests(queue &Q) {
test1(Q);
test2(Q);
test3(Q);
test4(Q);
}

int main() {
queue Q([](sycl::exception_list ExceptionList) {
if (ExceptionList.size() != 1) {
std::cerr << "Should be one exception in exception list" << std::endl;
std::abort();
}
std::rethrow_exception(*ExceptionList.begin());
});
tests(Q);
std::cout << "Test PASSED" << std::endl;
hdelan marked this conversation as resolved.
Show resolved Hide resolved
return 0;
}
2 changes: 1 addition & 1 deletion sycl/unittests/helpers/PiMockPlugin.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -613,7 +613,7 @@ mock_piMemBufferPartition(pi_mem buffer, pi_mem_flags flags,
return PI_SUCCESS;
}

inline pi_result mock_piextMemGetNativeHandle(pi_mem mem,
inline pi_result mock_piextMemGetNativeHandle(pi_mem mem, pi_device dev,
pi_native_handle *nativeHandle) {
*nativeHandle = reinterpret_cast<pi_native_handle>(mem);
return PI_SUCCESS;
Expand Down
Loading