Skip to content

Commit

Permalink
Merge pull request #1649 from frasercrmck/hip-multimap
Browse files Browse the repository at this point in the history
[HIP] Add support for multiple active mappings
  • Loading branch information
kbenzie authored May 23, 2024
2 parents 396fb20 + 07ddcbf commit 3e49b01
Show file tree
Hide file tree
Showing 3 changed files with 122 additions and 166 deletions.
109 changes: 50 additions & 59 deletions source/adapters/hip/enqueue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include "kernel.hpp"
#include "memory.hpp"
#include "queue.hpp"
#include "ur_api.h"

#include <ur/ur.hpp>

Expand Down Expand Up @@ -1239,49 +1240,42 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferMap(
UR_ASSERT(offset + size <= BufferImpl.getSize(),
UR_RESULT_ERROR_INVALID_SIZE);

ur_result_t Result = UR_RESULT_ERROR_INVALID_OPERATION;
const bool IsPinned =
BufferImpl.MemAllocMode == BufferMem::AllocMode::AllocHostPtr;

// Currently no support for overlapping regions
if (BufferImpl.getMapPtr() != nullptr) {
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
auto MapPtr = BufferImpl.mapToPtr(size, offset, mapFlags);
if (!MapPtr) {
return UR_RESULT_ERROR_INVALID_MEM_OBJECT;
}

// Allocate a pointer in the host to store the mapped information
auto HostPtr = BufferImpl.mapToPtr(size, offset, mapFlags);
*ppRetMap = std::get<BufferMem>(hBuffer->Mem).getMapPtr();
if (HostPtr) {
Result = UR_RESULT_SUCCESS;
}
const bool IsPinned =
BufferImpl.MemAllocMode == BufferMem::AllocMode::AllocHostPtr;

if (!IsPinned &&
((mapFlags & UR_MAP_FLAG_READ) || (mapFlags & UR_MAP_FLAG_WRITE))) {
// Pinned host memory is already on host so it doesn't need to be read.
Result = urEnqueueMemBufferRead(hQueue, hBuffer, blockingMap, offset, size,
HostPtr, numEventsInWaitList,
phEventWaitList, phEvent);
} else {
ScopedContext Active(hQueue->getDevice());
try {
if (!IsPinned && (mapFlags & (UR_MAP_FLAG_READ | UR_MAP_FLAG_WRITE))) {
// Pinned host memory is already on host so it doesn't need to be read.
UR_CHECK_ERROR(urEnqueueMemBufferRead(
hQueue, hBuffer, blockingMap, offset, size, MapPtr,
numEventsInWaitList, phEventWaitList, phEvent));
} else {
ScopedContext Active(hQueue->getDevice());

if (IsPinned) {
Result = urEnqueueEventsWait(hQueue, numEventsInWaitList, phEventWaitList,
nullptr);
}
if (IsPinned) {
UR_CHECK_ERROR(urEnqueueEventsWait(hQueue, numEventsInWaitList,
phEventWaitList, nullptr));
}

if (phEvent) {
try {
if (phEvent) {
*phEvent = ur_event_handle_t_::makeNative(
UR_COMMAND_MEM_BUFFER_MAP, hQueue, hQueue->getNextTransferStream());
UR_CHECK_ERROR((*phEvent)->start());
UR_CHECK_ERROR((*phEvent)->record());
} catch (ur_result_t Error) {
Result = Error;
}
}
} catch (ur_result_t Error) {
return Error;
}

return Result;
*ppRetMap = MapPtr;

return UR_RESULT_SUCCESS;
}

/// Implements the unmap from the host, using a BufferWrite operation.
Expand All @@ -1292,47 +1286,44 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemUnmap(
ur_queue_handle_t hQueue, ur_mem_handle_t hMem, void *pMappedPtr,
uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
ur_event_handle_t *phEvent) {
ur_result_t Result = UR_RESULT_SUCCESS;
UR_ASSERT(hMem->isBuffer(), UR_RESULT_ERROR_INVALID_MEM_OBJECT);
UR_ASSERT(std::get<BufferMem>(hMem->Mem).getMapPtr() != nullptr,
UR_RESULT_ERROR_INVALID_MEM_OBJECT);
UR_ASSERT(std::get<BufferMem>(hMem->Mem).getMapPtr() == pMappedPtr,
UR_RESULT_ERROR_INVALID_MEM_OBJECT);
auto &BufferImpl = std::get<BufferMem>(hMem->Mem);

const bool IsPinned = std::get<BufferMem>(hMem->Mem).MemAllocMode ==
BufferMem::AllocMode::AllocHostPtr;

if (!IsPinned &&
((std::get<BufferMem>(hMem->Mem).getMapFlags() & UR_MAP_FLAG_WRITE) ||
(std::get<BufferMem>(hMem->Mem).getMapFlags() &
UR_MAP_FLAG_WRITE_INVALIDATE_REGION))) {
// Pinned host memory is only on host so it doesn't need to be written to.
Result = urEnqueueMemBufferWrite(
hQueue, hMem, true, std::get<BufferMem>(hMem->Mem).getMapOffset(),
std::get<BufferMem>(hMem->Mem).getMapSize(), pMappedPtr,
numEventsInWaitList, phEventWaitList, phEvent);
} else {
ScopedContext Active(hQueue->getDevice());
auto *Map = BufferImpl.getMapDetails(pMappedPtr);
UR_ASSERT(Map != nullptr, UR_RESULT_ERROR_INVALID_MEM_OBJECT);

if (IsPinned) {
Result = urEnqueueEventsWait(hQueue, numEventsInWaitList, phEventWaitList,
nullptr);
}
const bool IsPinned =
BufferImpl.MemAllocMode == BufferMem::AllocMode::AllocHostPtr;

if (phEvent) {
try {
try {
if (!IsPinned &&
(Map->getMapFlags() &
(UR_MAP_FLAG_WRITE | UR_MAP_FLAG_WRITE_INVALIDATE_REGION))) {
// Pinned host memory is only on host so it doesn't need to be written to.
UR_CHECK_ERROR(urEnqueueMemBufferWrite(
hQueue, hMem, true, Map->getMapOffset(), Map->getMapSize(),
pMappedPtr, numEventsInWaitList, phEventWaitList, phEvent));
} else {
ScopedContext Active(hQueue->getDevice());

if (IsPinned) {
UR_CHECK_ERROR(urEnqueueEventsWait(hQueue, numEventsInWaitList,
phEventWaitList, nullptr));
}

if (phEvent) {
*phEvent = ur_event_handle_t_::makeNative(
UR_COMMAND_MEM_UNMAP, hQueue, hQueue->getNextTransferStream());
UR_CHECK_ERROR((*phEvent)->start());
UR_CHECK_ERROR((*phEvent)->record());
} catch (ur_result_t Error) {
Result = Error;
}
}
} catch (ur_result_t Error) {
return Error;
}

std::get<BufferMem>(hMem->Mem).unmap(pMappedPtr);
return Result;
BufferImpl.unmap(pMappedPtr);
return UR_RESULT_SUCCESS;
}

UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill(
Expand Down
125 changes: 69 additions & 56 deletions source/adapters/hip/memory.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,53 +9,72 @@
//===----------------------------------------------------------------------===//
#pragma once

#include "common.hpp"
#include "context.hpp"
#include "event.hpp"
#include <cassert>
#include <memory>
#include <unordered_map>
#include <variant>

#include "common.hpp"

ur_result_t allocateMemObjOnDeviceIfNeeded(ur_mem_handle_t,
const ur_device_handle_t);
ur_result_t migrateMemoryToDeviceIfNeeded(ur_mem_handle_t,
const ur_device_handle_t);

// Handler for plain, pointer-based HIP allocations
struct BufferMem {
struct BufferMap {
/// Size of the active mapped region.
size_t MapSize;
/// Offset of the active mapped region.
size_t MapOffset;
/// Original flags for the mapped region
ur_map_flags_t MapFlags;
/// Allocated host memory used exclusively for this map.
std::shared_ptr<unsigned char[]> MapMem;

BufferMap(size_t MapSize, size_t MapOffset, ur_map_flags_t MapFlags)
: MapSize(MapSize), MapOffset(MapOffset), MapFlags(MapFlags),
MapMem(nullptr) {}

BufferMap(size_t MapSize, size_t MapOffset, ur_map_flags_t MapFlags,
std::unique_ptr<unsigned char[]> &&MapMem)
: MapSize(MapSize), MapOffset(MapOffset), MapFlags(MapFlags),
MapMem(std::move(MapMem)) {}

size_t getMapSize() const noexcept { return MapSize; }

size_t getMapOffset() const noexcept { return MapOffset; }

ur_map_flags_t getMapFlags() const noexcept { return MapFlags; }
};

/** AllocMode
* Classic: Just a normal buffer allocated on the device via hip malloc
* UseHostPtr: Use an address on the host for the device
* CopyIn: The data for the device comes from the host but the host
pointer is not available later for re-use
* AllocHostPtr: Uses pinned-memory allocation
*/
enum class AllocMode { Classic, UseHostPtr, CopyIn, AllocHostPtr };

using native_type = hipDeviceptr_t;

// If this allocation is a sub-buffer (i.e., a view on an existing
// allocation), this is the pointer to the parent handler structure
ur_mem_handle_t Parent = nullptr;
// Outer mem holding this struct in variant
ur_mem_handle_t OuterMemStruct;

/// Pointer associated with this device on the host
void *HostPtr;
/// Size of the allocation in bytes
size_t Size;
/// Size of the active mapped region.
size_t MapSize;
/// Offset of the active mapped region.
size_t MapOffset;
/// Pointer to the active mapped region, if any
void *MapPtr;
/// Original flags for the mapped region
ur_map_flags_t MapFlags;
/// A map that contains all the active mappings for this buffer.
std::unordered_map<void *, BufferMap> PtrToBufferMap;

/** AllocMode
* Classic: Just a normal buffer allocated on the device via hip malloc
* UseHostPtr: Use an address on the host for the device
* CopyIn: The data for the device comes from the host but the host
pointer is not available later for re-use
* AllocHostPtr: Uses pinned-memory allocation
*/
enum class AllocMode {
Classic,
UseHostPtr,
CopyIn,
AllocHostPtr
} MemAllocMode;
AllocMode MemAllocMode;

private:
// Vector of HIP pointers
Expand All @@ -65,10 +84,8 @@ struct BufferMem {
BufferMem(ur_context_handle_t Context, ur_mem_handle_t OuterMemStruct,
AllocMode Mode, void *HostPtr, size_t Size)
: OuterMemStruct{OuterMemStruct}, HostPtr{HostPtr}, Size{Size},
MapSize{0}, MapOffset{0}, MapPtr{nullptr}, MapFlags{UR_MAP_FLAG_WRITE},
MemAllocMode{Mode}, Ptrs(Context->Devices.size(), native_type{0}){};

BufferMem(const BufferMem &Buffer) = default;
PtrToBufferMap{}, MemAllocMode{Mode},
Ptrs(Context->Devices.size(), native_type{0}){};

// This will allocate memory on device if there isn't already an active
// allocation on the device
Expand Down Expand Up @@ -98,45 +115,41 @@ struct BufferMem {

size_t getSize() const noexcept { return Size; }

void *getMapPtr() const noexcept { return MapPtr; }

size_t getMapSize() const noexcept { return MapSize; }

size_t getMapOffset() const noexcept { return MapOffset; }
BufferMap *getMapDetails(void *Map) {
auto details = PtrToBufferMap.find(Map);
if (details != PtrToBufferMap.end()) {
return &details->second;
}
return nullptr;
}

/// Returns a pointer to data visible on the host that contains
/// the data on the device associated with this allocation.
/// The offset is used to index into the HIP allocation.
///
void *mapToPtr(size_t Size, size_t Offset, ur_map_flags_t Flags) noexcept {
assert(MapPtr == nullptr);
MapSize = Size;
MapOffset = Offset;
MapFlags = Flags;
if (HostPtr) {
MapPtr = static_cast<char *>(HostPtr) + Offset;
void *mapToPtr(size_t MapSize, size_t MapOffset,
ur_map_flags_t MapFlags) noexcept {
void *MapPtr = nullptr;
if (HostPtr == nullptr) {
/// If HostPtr is invalid, we need to create a Mapping that owns its own
/// memory on the host.
auto MapMem = std::make_unique<unsigned char[]>(MapSize);
MapPtr = MapMem.get();
PtrToBufferMap.insert(
{MapPtr, BufferMap(MapSize, MapOffset, MapFlags, std::move(MapMem))});
} else {
// TODO: Allocate only what is needed based on the offset
MapPtr = static_cast<void *>(malloc(this->getSize()));
/// However, if HostPtr already has valid memory (e.g. pinned allocation),
/// we can just use that memory for the mapping.
MapPtr = static_cast<char *>(HostPtr) + MapOffset;
PtrToBufferMap.insert({MapPtr, BufferMap(MapSize, MapOffset, MapFlags)});
}
return MapPtr;
}

/// Detach the allocation from the host memory.
void unmap(void *) noexcept {
void unmap(void *MapPtr) noexcept {
assert(MapPtr != nullptr);

if (MapPtr != HostPtr) {
free(MapPtr);
}
MapPtr = nullptr;
MapSize = 0;
MapOffset = 0;
}

ur_map_flags_t getMapFlags() const noexcept {
assert(MapPtr != nullptr);
return MapFlags;
PtrToBufferMap.erase(MapPtr);
}

ur_result_t clear() {
Expand Down Expand Up @@ -414,7 +427,7 @@ struct ur_mem_handle_t_ {
HaveMigratedToDeviceSinceLastWrite(Context->Devices.size(), false),
Mem{std::in_place_type<BufferMem>, Ctxt, this, Mode, HostPtr, Size} {
urContextRetain(Context);
};
}

// Subbuffer constructor
ur_mem_handle_t_(ur_mem Parent, size_t SubBufferOffset)
Expand All @@ -435,7 +448,7 @@ struct ur_mem_handle_t_ {
}
}
urMemRetain(Parent);
};
}

/// Constructs the UR mem handler for an Image object
ur_mem_handle_t_(ur_context Ctxt, ur_mem_flags_t MemFlags,
Expand Down
Loading

0 comments on commit 3e49b01

Please sign in to comment.