From 15a2787f9ba89ca51272a3ea9fcb632654b9b3d3 Mon Sep 17 00:00:00 2001 From: Hugh Delaney Date: Fri, 26 Apr 2024 11:23:07 +0100 Subject: [PATCH 1/4] Get device from queue, not event If an event was constructed using some interop method it doesn't have an associated queue. Implicit migration of buffers was working on the assumption that each event has an associated device, which can be obtained from the queue. This patch removes these assumptions. --- source/adapters/hip/enqueue.cpp | 21 +++++++++++---------- source/adapters/hip/event.hpp | 2 -- source/adapters/hip/memory.cpp | 20 ++++++++------------ source/adapters/hip/memory.hpp | 15 ++++++++++++--- 4 files changed, 31 insertions(+), 27 deletions(-) diff --git a/source/adapters/hip/enqueue.cpp b/source/adapters/hip/enqueue.cpp index e6e3dd73fa..1b79814a5f 100644 --- a/source/adapters/hip/enqueue.cpp +++ b/source/adapters/hip/enqueue.cpp @@ -20,7 +20,7 @@ extern size_t imageElementByteSize(hipArray_Format ArrayFormat); -ur_result_t enqueueEventsWait(ur_queue_handle_t, hipStream_t Stream, +ur_result_t enqueueEventsWait(ur_queue_handle_t Queue, hipStream_t Stream, uint32_t NumEventsInWaitList, const ur_event_handle_t *EventWaitList) { if (!EventWaitList) { @@ -29,8 +29,8 @@ ur_result_t enqueueEventsWait(ur_queue_handle_t, hipStream_t Stream, try { auto Result = forLatestEvents( EventWaitList, NumEventsInWaitList, - [Stream](ur_event_handle_t Event) -> ur_result_t { - ScopedContext Active(Event->getDevice()); + [Stream, Queue](ur_event_handle_t Event) -> ur_result_t { + ScopedContext Active(Queue->getDevice()); if (Event->isCompleted() || Event->getStream() == Stream) { return UR_RESULT_SUCCESS; } else { @@ -218,8 +218,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferRead( // last queue to write to the MemBuffer, meaning we must perform the copy // from a different device if (hBuffer->LastEventWritingToMemObj && - hBuffer->LastEventWritingToMemObj->getDevice() != hQueue->getDevice()) { - Device = hBuffer->LastEventWritingToMemObj->getDevice(); + hBuffer->LastDeviceWritingToMemObj != hQueue->getDevice()) { + Device = hBuffer->LastDeviceWritingToMemObj; ScopedContext Active(Device); HIPStream = hipStream_t{0}; // Default stream for different device // We may have to wait for an event on another queue if it is the last @@ -367,7 +367,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( // if it has been written to if (phEvent && (MemArg.AccessFlags & (UR_MEM_FLAG_READ_WRITE | UR_MEM_FLAG_WRITE_ONLY))) { - MemArg.Mem->setLastEventWritingToMemObj(RetImplEvent.get()); + MemArg.Mem->setLastEventWritingToMemObj(RetImplEvent.get(), + hQueue->getDevice()); } } // We can release the MemoryMigrationMutexes now @@ -584,8 +585,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferReadRect( // last queue to write to the MemBuffer, meaning we must perform the copy // from a different device if (hBuffer->LastEventWritingToMemObj && - hBuffer->LastEventWritingToMemObj->getDevice() != hQueue->getDevice()) { - Device = hBuffer->LastEventWritingToMemObj->getDevice(); + hBuffer->LastDeviceWritingToMemObj != hQueue->getDevice()) { + Device = hBuffer->LastDeviceWritingToMemObj; ScopedContext Active(Device); HIPStream = hipStream_t{0}; // Default stream for different device // We may have to wait for an event on another queue if it is the last @@ -1017,8 +1018,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageRead( // last queue to write to the MemBuffer, meaning we must perform the copy // from a different device if (hImage->LastEventWritingToMemObj && - hImage->LastEventWritingToMemObj->getDevice() != hQueue->getDevice()) { - Device = hImage->LastEventWritingToMemObj->getDevice(); + hImage->LastDeviceWritingToMemObj != hQueue->getDevice()) { + Device = hImage->LastDeviceWritingToMemObj; ScopedContext Active(Device); HIPStream = hipStream_t{0}; // Default stream for different device // We may have to wait for an event on another queue if it is the last diff --git a/source/adapters/hip/event.hpp b/source/adapters/hip/event.hpp index 64e8b2d9c8..5e7c1d7e7d 100644 --- a/source/adapters/hip/event.hpp +++ b/source/adapters/hip/event.hpp @@ -28,8 +28,6 @@ struct ur_event_handle_t_ { ur_queue_handle_t getQueue() const noexcept { return Queue; } - ur_device_handle_t getDevice() const noexcept { return Queue->getDevice(); } - hipStream_t getStream() const noexcept { return Stream; } uint32_t getComputeStreamToken() const noexcept { return StreamToken; } diff --git a/source/adapters/hip/memory.cpp b/source/adapters/hip/memory.cpp index dcc3e34fad..fc899070a2 100644 --- a/source/adapters/hip/memory.cpp +++ b/source/adapters/hip/memory.cpp @@ -525,11 +525,10 @@ inline ur_result_t migrateBufferToDevice(ur_mem_handle_t Mem, UR_CHECK_ERROR( hipMemcpyHtoD(Buffer.getPtr(hDevice), Buffer.HostPtr, Buffer.Size)); } - } else if (Mem->LastEventWritingToMemObj->getDevice() != hDevice) { - UR_CHECK_ERROR( - hipMemcpyDtoD(Buffer.getPtr(hDevice), - Buffer.getPtr(Mem->LastEventWritingToMemObj->getDevice()), - Buffer.Size)); + } else if (Mem->LastDeviceWritingToMemObj != hDevice) { + UR_CHECK_ERROR(hipMemcpyDtoD(Buffer.getPtr(hDevice), + Buffer.getPtr(Mem->LastDeviceWritingToMemObj), + Buffer.Size)); } return UR_RESULT_SUCCESS; } @@ -577,22 +576,19 @@ inline ur_result_t migrateImageToDevice(ur_mem_handle_t Mem, CpyDesc3D.srcHost = Image.HostPtr; UR_CHECK_ERROR(hipDrvMemcpy3D(&CpyDesc3D)); } - } else if (Mem->LastEventWritingToMemObj->getDevice() != hDevice) { + } else if (Mem->LastDeviceWritingToMemObj != hDevice) { if (Image.ImageDesc.type == UR_MEM_TYPE_IMAGE1D) { // FIXME: 1D memcpy from DtoD going through the host. UR_CHECK_ERROR(hipMemcpyAtoH( - Image.HostPtr, - Image.getArray(Mem->LastEventWritingToMemObj->getDevice()), + Image.HostPtr, Image.getArray(Mem->LastDeviceWritingToMemObj), 0 /*srcOffset*/, ImageSizeBytes)); UR_CHECK_ERROR( hipMemcpyHtoA(ImageArray, 0, Image.HostPtr, ImageSizeBytes)); } else if (Image.ImageDesc.type == UR_MEM_TYPE_IMAGE2D) { - CpyDesc2D.srcArray = - Image.getArray(Mem->LastEventWritingToMemObj->getDevice()); + CpyDesc2D.srcArray = Image.getArray(Mem->LastDeviceWritingToMemObj); UR_CHECK_ERROR(hipMemcpyParam2D(&CpyDesc2D)); } else if (Image.ImageDesc.type == UR_MEM_TYPE_IMAGE3D) { - CpyDesc3D.srcArray = - Image.getArray(Mem->LastEventWritingToMemObj->getDevice()); + CpyDesc3D.srcArray = Image.getArray(Mem->LastDeviceWritingToMemObj); UR_CHECK_ERROR(hipDrvMemcpy3D(&CpyDesc3D)); } } diff --git a/source/adapters/hip/memory.hpp b/source/adapters/hip/memory.hpp index 7707794b3c..ff3210582e 100644 --- a/source/adapters/hip/memory.hpp +++ b/source/adapters/hip/memory.hpp @@ -393,6 +393,10 @@ struct ur_mem_handle_t_ { // We should wait on this event prior to migrating memory across allocations // in this ur_mem_handle_t_ ur_event_handle_t LastEventWritingToMemObj{nullptr}; + // Since the event may not contain device info (if using interop, which + // doesn't take a queue) we should use this member var to keep track of which + // device has most recent view of data + ur_device_handle_t LastDeviceWritingToMemObj{nullptr}; // Enumerates all possible types of accesses. enum access_mode_t { unknown, read_write, read_only, write_only }; @@ -487,18 +491,23 @@ struct ur_mem_handle_t_ { uint32_t getReferenceCount() const noexcept { return RefCount; } - void setLastEventWritingToMemObj(ur_event_handle_t NewEvent) { + void setLastEventWritingToMemObj(ur_event_handle_t NewEvent, + ur_device_handle_t RecentDevice) { assert(NewEvent && "Invalid event!"); // This entry point should only ever be called when using multi device ctx assert(Context->Devices.size() > 1); + urEventRetain(NewEvent); + urDeviceRetain(RecentDevice); if (LastEventWritingToMemObj != nullptr) { urEventRelease(LastEventWritingToMemObj); } - urEventRetain(NewEvent); + if (LastDeviceWritingToMemObj != nullptr) { + urDeviceRelease(LastDeviceWritingToMemObj); + } LastEventWritingToMemObj = NewEvent; for (const auto &Device : Context->getDevices()) { HaveMigratedToDeviceSinceLastWrite[Device->getIndex()] = - Device == NewEvent->getDevice(); + Device == RecentDevice; } } }; From a2eb1e4f8fdd320845f4a6fc27a157c351c46d82 Mon Sep 17 00:00:00 2001 From: Hugh Delaney Date: Mon, 13 May 2024 14:10:22 +0100 Subject: [PATCH 2/4] Remove LastDeviceWritingToMemObj LastEventWritingToMemObj is never an interop event, so always has an associated queue. So using an extra LastDeviceWritingToMemObj is not necessary. --- source/adapters/hip/enqueue.cpp | 18 ++++++++++-------- source/adapters/hip/memory.cpp | 23 +++++++++++++++-------- source/adapters/hip/memory.hpp | 15 +++------------ 3 files changed, 28 insertions(+), 28 deletions(-) diff --git a/source/adapters/hip/enqueue.cpp b/source/adapters/hip/enqueue.cpp index 1b79814a5f..f157809295 100644 --- a/source/adapters/hip/enqueue.cpp +++ b/source/adapters/hip/enqueue.cpp @@ -218,8 +218,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferRead( // last queue to write to the MemBuffer, meaning we must perform the copy // from a different device if (hBuffer->LastEventWritingToMemObj && - hBuffer->LastDeviceWritingToMemObj != hQueue->getDevice()) { - Device = hBuffer->LastDeviceWritingToMemObj; + hBuffer->LastEventWritingToMemObj->getQueue()->getDevice() != + hQueue->getDevice()) { + Device = hBuffer->LastEventWritingToMemObj->getQueue()->getDevice(); ScopedContext Active(Device); HIPStream = hipStream_t{0}; // Default stream for different device // We may have to wait for an event on another queue if it is the last @@ -367,8 +368,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( // if it has been written to if (phEvent && (MemArg.AccessFlags & (UR_MEM_FLAG_READ_WRITE | UR_MEM_FLAG_WRITE_ONLY))) { - MemArg.Mem->setLastEventWritingToMemObj(RetImplEvent.get(), - hQueue->getDevice()); + MemArg.Mem->setLastEventWritingToMemObj(RetImplEvent.get()); } } // We can release the MemoryMigrationMutexes now @@ -585,8 +585,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferReadRect( // last queue to write to the MemBuffer, meaning we must perform the copy // from a different device if (hBuffer->LastEventWritingToMemObj && - hBuffer->LastDeviceWritingToMemObj != hQueue->getDevice()) { - Device = hBuffer->LastDeviceWritingToMemObj; + hBuffer->LastEventWritingToMemObj->getQueue()->getDevice() != + hQueue->getDevice()) { + Device = hBuffer->LastEventWritingToMemObj->getQueue()->getDevice(); ScopedContext Active(Device); HIPStream = hipStream_t{0}; // Default stream for different device // We may have to wait for an event on another queue if it is the last @@ -1018,8 +1019,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageRead( // last queue to write to the MemBuffer, meaning we must perform the copy // from a different device if (hImage->LastEventWritingToMemObj && - hImage->LastDeviceWritingToMemObj != hQueue->getDevice()) { - Device = hImage->LastDeviceWritingToMemObj; + hImage->LastEventWritingToMemObj->getQueue()->getDevice() != + hQueue->getDevice()) { + Device = hImage->LastEventWritingToMemObj->getQueue()->getDevice(); ScopedContext Active(Device); HIPStream = hipStream_t{0}; // Default stream for different device // We may have to wait for an event on another queue if it is the last diff --git a/source/adapters/hip/memory.cpp b/source/adapters/hip/memory.cpp index fc899070a2..ff209884ce 100644 --- a/source/adapters/hip/memory.cpp +++ b/source/adapters/hip/memory.cpp @@ -525,10 +525,12 @@ inline ur_result_t migrateBufferToDevice(ur_mem_handle_t Mem, UR_CHECK_ERROR( hipMemcpyHtoD(Buffer.getPtr(hDevice), Buffer.HostPtr, Buffer.Size)); } - } else if (Mem->LastDeviceWritingToMemObj != hDevice) { - UR_CHECK_ERROR(hipMemcpyDtoD(Buffer.getPtr(hDevice), - Buffer.getPtr(Mem->LastDeviceWritingToMemObj), - Buffer.Size)); + } else if (Mem->LastEventWritingToMemObj->getQueue()->getDevice() != + hDevice) { + UR_CHECK_ERROR(hipMemcpyDtoD( + Buffer.getPtr(hDevice), + Buffer.getPtr(Mem->LastEventWritingToMemObj->getQueue()->getDevice()), + Buffer.Size)); } return UR_RESULT_SUCCESS; } @@ -576,19 +578,24 @@ inline ur_result_t migrateImageToDevice(ur_mem_handle_t Mem, CpyDesc3D.srcHost = Image.HostPtr; UR_CHECK_ERROR(hipDrvMemcpy3D(&CpyDesc3D)); } - } else if (Mem->LastDeviceWritingToMemObj != hDevice) { + } else if (Mem->LastEventWritingToMemObj->getQueue()->getDevice() != + hDevice) { if (Image.ImageDesc.type == UR_MEM_TYPE_IMAGE1D) { // FIXME: 1D memcpy from DtoD going through the host. UR_CHECK_ERROR(hipMemcpyAtoH( - Image.HostPtr, Image.getArray(Mem->LastDeviceWritingToMemObj), + Image.HostPtr, + Image.getArray( + Mem->LastEventWritingToMemObj->getQueue()->getDevice()), 0 /*srcOffset*/, ImageSizeBytes)); UR_CHECK_ERROR( hipMemcpyHtoA(ImageArray, 0, Image.HostPtr, ImageSizeBytes)); } else if (Image.ImageDesc.type == UR_MEM_TYPE_IMAGE2D) { - CpyDesc2D.srcArray = Image.getArray(Mem->LastDeviceWritingToMemObj); + CpyDesc2D.srcArray = Image.getArray( + Mem->LastEventWritingToMemObj->getQueue()->getDevice()); UR_CHECK_ERROR(hipMemcpyParam2D(&CpyDesc2D)); } else if (Image.ImageDesc.type == UR_MEM_TYPE_IMAGE3D) { - CpyDesc3D.srcArray = Image.getArray(Mem->LastDeviceWritingToMemObj); + CpyDesc3D.srcArray = Image.getArray( + Mem->LastEventWritingToMemObj->getQueue()->getDevice()); UR_CHECK_ERROR(hipDrvMemcpy3D(&CpyDesc3D)); } } diff --git a/source/adapters/hip/memory.hpp b/source/adapters/hip/memory.hpp index ff3210582e..36545dea02 100644 --- a/source/adapters/hip/memory.hpp +++ b/source/adapters/hip/memory.hpp @@ -393,10 +393,6 @@ struct ur_mem_handle_t_ { // We should wait on this event prior to migrating memory across allocations // in this ur_mem_handle_t_ ur_event_handle_t LastEventWritingToMemObj{nullptr}; - // Since the event may not contain device info (if using interop, which - // doesn't take a queue) we should use this member var to keep track of which - // device has most recent view of data - ur_device_handle_t LastDeviceWritingToMemObj{nullptr}; // Enumerates all possible types of accesses. enum access_mode_t { unknown, read_write, read_only, write_only }; @@ -491,23 +487,18 @@ struct ur_mem_handle_t_ { uint32_t getReferenceCount() const noexcept { return RefCount; } - void setLastEventWritingToMemObj(ur_event_handle_t NewEvent, - ur_device_handle_t RecentDevice) { + void setLastEventWritingToMemObj(ur_event_handle_t NewEvent) { assert(NewEvent && "Invalid event!"); // This entry point should only ever be called when using multi device ctx assert(Context->Devices.size() > 1); - urEventRetain(NewEvent); - urDeviceRetain(RecentDevice); if (LastEventWritingToMemObj != nullptr) { urEventRelease(LastEventWritingToMemObj); } - if (LastDeviceWritingToMemObj != nullptr) { - urDeviceRelease(LastDeviceWritingToMemObj); - } + urEventRetain(NewEvent); LastEventWritingToMemObj = NewEvent; for (const auto &Device : Context->getDevices()) { HaveMigratedToDeviceSinceLastWrite[Device->getIndex()] = - Device == RecentDevice; + Device == NewEvent->getQueue()->getDevice(); } } }; From 49f71b7c3b9aa8cbf96500d1be491b37674e3d25 Mon Sep 17 00:00:00 2001 From: Hugh Delaney Date: Mon, 13 May 2024 14:42:28 +0100 Subject: [PATCH 3/4] Make sure context is set after enqueueEventsWait enqueueEventsWait may set a different context, so we need to reset it after returning. --- source/adapters/hip/enqueue.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/source/adapters/hip/enqueue.cpp b/source/adapters/hip/enqueue.cpp index f157809295..76d1baefed 100644 --- a/source/adapters/hip/enqueue.cpp +++ b/source/adapters/hip/enqueue.cpp @@ -229,12 +229,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferRead( &hBuffer->LastEventWritingToMemObj)); } - ScopedContext Active(Device); - // Use the default stream if copying from another device UR_CHECK_ERROR(enqueueEventsWait(hQueue, HIPStream, numEventsInWaitList, phEventWaitList)); + // enqueueEventsWait may set a different context + ScopedContext Active(Device); + if (phEvent) { RetImplEvent = std::unique_ptr(ur_event_handle_t_::makeNative( @@ -596,11 +597,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferReadRect( &hBuffer->LastEventWritingToMemObj)); } - ScopedContext Active(Device); - UR_CHECK_ERROR(enqueueEventsWait(hQueue, HIPStream, numEventsInWaitList, phEventWaitList)); + // enqueueEventsWait may set a different context + ScopedContext Active(Device); + if (phEvent) { RetImplEvent = std::unique_ptr(ur_event_handle_t_::makeNative( From e9702e4c6ff408ba812a847100d81338aa3ad8f7 Mon Sep 17 00:00:00 2001 From: Hugh Delaney Date: Tue, 14 May 2024 10:37:31 +0100 Subject: [PATCH 4/4] Reassign hQueue if copying from a different device We should use the queue associated with the most reecent copy of data if reading from buffer. --- source/adapters/hip/enqueue.cpp | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/source/adapters/hip/enqueue.cpp b/source/adapters/hip/enqueue.cpp index 76d1baefed..12449683df 100644 --- a/source/adapters/hip/enqueue.cpp +++ b/source/adapters/hip/enqueue.cpp @@ -220,7 +220,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferRead( if (hBuffer->LastEventWritingToMemObj && hBuffer->LastEventWritingToMemObj->getQueue()->getDevice() != hQueue->getDevice()) { - Device = hBuffer->LastEventWritingToMemObj->getQueue()->getDevice(); + // This event is never created with interop so getQueue is never null + hQueue = hBuffer->LastEventWritingToMemObj->getQueue(); + Device = hQueue->getDevice(); ScopedContext Active(Device); HIPStream = hipStream_t{0}; // Default stream for different device // We may have to wait for an event on another queue if it is the last @@ -229,13 +231,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferRead( &hBuffer->LastEventWritingToMemObj)); } + ScopedContext Active(Device); + // Use the default stream if copying from another device UR_CHECK_ERROR(enqueueEventsWait(hQueue, HIPStream, numEventsInWaitList, phEventWaitList)); - // enqueueEventsWait may set a different context - ScopedContext Active(Device); - if (phEvent) { RetImplEvent = std::unique_ptr(ur_event_handle_t_::makeNative( @@ -588,7 +589,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferReadRect( if (hBuffer->LastEventWritingToMemObj && hBuffer->LastEventWritingToMemObj->getQueue()->getDevice() != hQueue->getDevice()) { - Device = hBuffer->LastEventWritingToMemObj->getQueue()->getDevice(); + // This event is never created with interop so getQueue is never null + hQueue = hBuffer->LastEventWritingToMemObj->getQueue(); + Device = hQueue->getDevice(); ScopedContext Active(Device); HIPStream = hipStream_t{0}; // Default stream for different device // We may have to wait for an event on another queue if it is the last @@ -597,12 +600,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferReadRect( &hBuffer->LastEventWritingToMemObj)); } + ScopedContext Active(Device); + UR_CHECK_ERROR(enqueueEventsWait(hQueue, HIPStream, numEventsInWaitList, phEventWaitList)); - // enqueueEventsWait may set a different context - ScopedContext Active(Device); - if (phEvent) { RetImplEvent = std::unique_ptr(ur_event_handle_t_::makeNative( @@ -1023,7 +1025,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageRead( if (hImage->LastEventWritingToMemObj && hImage->LastEventWritingToMemObj->getQueue()->getDevice() != hQueue->getDevice()) { - Device = hImage->LastEventWritingToMemObj->getQueue()->getDevice(); + hQueue = hImage->LastEventWritingToMemObj->getQueue(); + Device = hQueue->getDevice(); ScopedContext Active(Device); HIPStream = hipStream_t{0}; // Default stream for different device // We may have to wait for an event on another queue if it is the last