-
Notifications
You must be signed in to change notification settings - Fork 116
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #939 from steffenlarsen/steffen/virtual_mem_adapters
[UR][CUDA][L0][HIP] Add virtual memory adapter implementations
- Loading branch information
Showing
39 changed files
with
859 additions
and
279 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
//===--------- physical_mem.cpp - CUDA Adapter ----------------------------===// | ||
// | ||
// Copyright (C) 2023 Intel Corporation | ||
// | ||
// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM | ||
// Exceptions. See LICENSE.TXT | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
//===----------------------------------------------------------------------===// | ||
|
||
#include "physical_mem.hpp" | ||
#include "common.hpp" | ||
#include "context.hpp" | ||
#include "event.hpp" | ||
|
||
#include <cassert> | ||
#include <cuda.h> | ||
|
||
UR_APIEXPORT ur_result_t UR_APICALL urPhysicalMemCreate( | ||
ur_context_handle_t hContext, ur_device_handle_t hDevice, size_t size, | ||
[[maybe_unused]] const ur_physical_mem_properties_t *pProperties, | ||
ur_physical_mem_handle_t *phPhysicalMem) { | ||
CUmemAllocationProp AllocProps = {}; | ||
AllocProps.location.type = CU_MEM_LOCATION_TYPE_DEVICE; | ||
AllocProps.type = CU_MEM_ALLOCATION_TYPE_PINNED; | ||
UR_CHECK_ERROR(GetDeviceOrdinal(hDevice, AllocProps.location.id)); | ||
|
||
CUmemGenericAllocationHandle ResHandle; | ||
switch (auto Result = cuMemCreate(&ResHandle, size, &AllocProps, 0)) { | ||
case CUDA_ERROR_INVALID_VALUE: | ||
return UR_RESULT_ERROR_INVALID_SIZE; | ||
default: | ||
UR_CHECK_ERROR(Result); | ||
} | ||
*phPhysicalMem = new ur_physical_mem_handle_t_(ResHandle, hContext); | ||
|
||
return UR_RESULT_SUCCESS; | ||
} | ||
|
||
UR_APIEXPORT ur_result_t UR_APICALL | ||
urPhysicalMemRetain(ur_physical_mem_handle_t hPhysicalMem) { | ||
hPhysicalMem->incrementReferenceCount(); | ||
return UR_RESULT_SUCCESS; | ||
} | ||
|
||
UR_APIEXPORT ur_result_t UR_APICALL | ||
urPhysicalMemRelease(ur_physical_mem_handle_t hPhysicalMem) { | ||
if (hPhysicalMem->decrementReferenceCount() > 0) | ||
return UR_RESULT_SUCCESS; | ||
|
||
try { | ||
std::unique_ptr<ur_physical_mem_handle_t_> PhysicalMemGuard(hPhysicalMem); | ||
|
||
ScopedContext Active(hPhysicalMem->getContext()); | ||
UR_CHECK_ERROR(cuMemRelease(hPhysicalMem->get())); | ||
return UR_RESULT_SUCCESS; | ||
} catch (ur_result_t err) { | ||
return err; | ||
} catch (...) { | ||
return UR_RESULT_ERROR_OUT_OF_RESOURCES; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
//===---------- physical_mem.hpp - CUDA Adapter ---------------------------===// | ||
// | ||
// Copyright (C) 2023 Intel Corporation | ||
// | ||
// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM | ||
// Exceptions. See LICENSE.TXT | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
//===----------------------------------------------------------------------===// | ||
#pragma once | ||
|
||
#include <ur/ur.hpp> | ||
|
||
#include <cuda.h> | ||
|
||
#include "adapter.hpp" | ||
#include "device.hpp" | ||
#include "platform.hpp" | ||
|
||
/// UR queue mapping on physical memory allocations used in virtual memory | ||
/// management. | ||
/// | ||
struct ur_physical_mem_handle_t_ { | ||
using native_type = CUmemGenericAllocationHandle; | ||
|
||
std::atomic_uint32_t RefCount; | ||
native_type PhysicalMem; | ||
ur_context_handle_t_ *Context; | ||
|
||
ur_physical_mem_handle_t_(native_type PhysMem, ur_context_handle_t_ *Ctx) | ||
: RefCount(1), PhysicalMem(PhysMem), Context(Ctx) { | ||
urContextRetain(Context); | ||
} | ||
|
||
~ur_physical_mem_handle_t_() { urContextRelease(Context); } | ||
|
||
native_type get() const noexcept { return PhysicalMem; } | ||
|
||
ur_context_handle_t_ *getContext() const noexcept { return Context; } | ||
|
||
uint32_t incrementReferenceCount() noexcept { return ++RefCount; } | ||
|
||
uint32_t decrementReferenceCount() noexcept { return --RefCount; } | ||
|
||
uint32_t getReferenceCount() const noexcept { return RefCount; } | ||
}; | ||
|
||
// Find a device ordinal of a device. | ||
inline ur_result_t GetDeviceOrdinal(ur_device_handle_t Device, int &Ordinal) { | ||
ur_adapter_handle_t AdapterHandle = &adapter; | ||
// Get list of platforms | ||
uint32_t NumPlatforms; | ||
UR_CHECK_ERROR(urPlatformGet(&AdapterHandle, 1, 0, nullptr, &NumPlatforms)); | ||
UR_ASSERT(NumPlatforms, UR_RESULT_ERROR_UNKNOWN); | ||
|
||
std::vector<ur_platform_handle_t> Platforms{NumPlatforms}; | ||
UR_CHECK_ERROR(urPlatformGet(&AdapterHandle, 1, NumPlatforms, | ||
Platforms.data(), nullptr)); | ||
|
||
// Ordinal corresponds to the platform ID as each device has its own platform. | ||
CUdevice NativeDevice = Device->get(); | ||
for (Ordinal = 0; size_t(Ordinal) < Platforms.size(); ++Ordinal) | ||
if (Platforms[Ordinal]->Devices[0]->get() == NativeDevice) | ||
return UR_RESULT_SUCCESS; | ||
return UR_RESULT_ERROR_INVALID_DEVICE; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,135 @@ | ||
//===--------- virtual_mem.cpp - CUDA Adapter -----------------------------===// | ||
// | ||
// Copyright (C) 2023 Intel Corporation | ||
// | ||
// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM | ||
// Exceptions. See LICENSE.TXT | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
//===----------------------------------------------------------------------===// | ||
|
||
#include "common.hpp" | ||
#include "context.hpp" | ||
#include "event.hpp" | ||
#include "physical_mem.hpp" | ||
|
||
#include <cassert> | ||
#include <cuda.h> | ||
|
||
UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemGranularityGetInfo( | ||
ur_context_handle_t hContext, ur_device_handle_t hDevice, | ||
ur_virtual_mem_granularity_info_t propName, size_t propSize, | ||
void *pPropValue, size_t *pPropSizeRet) { | ||
UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); | ||
|
||
ScopedContext Active(hContext); | ||
switch (propName) { | ||
case UR_VIRTUAL_MEM_GRANULARITY_INFO_MINIMUM: | ||
case UR_VIRTUAL_MEM_GRANULARITY_INFO_RECOMMENDED: { | ||
CUmemAllocationGranularity_flags Flags = | ||
propName == UR_VIRTUAL_MEM_GRANULARITY_INFO_MINIMUM | ||
? CU_MEM_ALLOC_GRANULARITY_MINIMUM | ||
: CU_MEM_ALLOC_GRANULARITY_RECOMMENDED; | ||
CUmemAllocationProp AllocProps = {}; | ||
AllocProps.location.type = CU_MEM_LOCATION_TYPE_DEVICE; | ||
AllocProps.type = CU_MEM_ALLOCATION_TYPE_PINNED; | ||
UR_CHECK_ERROR(GetDeviceOrdinal(hDevice, AllocProps.location.id)); | ||
|
||
size_t Granularity; | ||
UR_CHECK_ERROR( | ||
cuMemGetAllocationGranularity(&Granularity, &AllocProps, Flags)); | ||
return ReturnValue(Granularity); | ||
} | ||
default: | ||
return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; | ||
} | ||
|
||
return UR_RESULT_SUCCESS; | ||
} | ||
|
||
UR_APIEXPORT ur_result_t UR_APICALL | ||
urVirtualMemReserve(ur_context_handle_t hContext, const void *pStart, | ||
size_t size, void **ppStart) { | ||
ScopedContext Active(hContext); | ||
UR_CHECK_ERROR(cuMemAddressReserve((CUdeviceptr *)ppStart, size, 0, | ||
(CUdeviceptr)pStart, 0)); | ||
return UR_RESULT_SUCCESS; | ||
} | ||
|
||
UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemFree( | ||
ur_context_handle_t hContext, const void *pStart, size_t size) { | ||
ScopedContext Active(hContext); | ||
UR_CHECK_ERROR(cuMemAddressFree((CUdeviceptr)pStart, size)); | ||
return UR_RESULT_SUCCESS; | ||
} | ||
|
||
UR_APIEXPORT ur_result_t UR_APICALL | ||
urVirtualMemSetAccess(ur_context_handle_t hContext, const void *pStart, | ||
size_t size, ur_virtual_mem_access_flags_t flags) { | ||
CUmemAccessDesc AccessDesc = {}; | ||
if (flags & UR_VIRTUAL_MEM_ACCESS_FLAG_READ_WRITE) | ||
AccessDesc.flags = CU_MEM_ACCESS_FLAGS_PROT_READWRITE; | ||
else if (flags & UR_VIRTUAL_MEM_ACCESS_FLAG_READ_ONLY) | ||
AccessDesc.flags = CU_MEM_ACCESS_FLAGS_PROT_READ; | ||
else | ||
AccessDesc.flags = CU_MEM_ACCESS_FLAGS_PROT_NONE; | ||
AccessDesc.location.type = CU_MEM_LOCATION_TYPE_DEVICE; | ||
// TODO: When contexts support multiple devices, we should create a descriptor | ||
// for each. We may also introduce a variant of this function with a | ||
// specific device. | ||
UR_CHECK_ERROR( | ||
GetDeviceOrdinal(hContext->getDevice(), AccessDesc.location.id)); | ||
|
||
ScopedContext Active(hContext); | ||
UR_CHECK_ERROR(cuMemSetAccess((CUdeviceptr)pStart, size, &AccessDesc, 1)); | ||
return UR_RESULT_SUCCESS; | ||
} | ||
|
||
UR_APIEXPORT ur_result_t UR_APICALL | ||
urVirtualMemMap(ur_context_handle_t hContext, const void *pStart, size_t size, | ||
ur_physical_mem_handle_t hPhysicalMem, size_t offset, | ||
ur_virtual_mem_access_flags_t flags) { | ||
ScopedContext Active(hContext); | ||
UR_CHECK_ERROR( | ||
cuMemMap((CUdeviceptr)pStart, size, offset, hPhysicalMem->get(), 0)); | ||
if (flags) | ||
UR_CHECK_ERROR(urVirtualMemSetAccess(hContext, pStart, size, flags)); | ||
return UR_RESULT_SUCCESS; | ||
} | ||
|
||
UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemUnmap( | ||
ur_context_handle_t hContext, const void *pStart, size_t size) { | ||
ScopedContext Active(hContext); | ||
UR_CHECK_ERROR(cuMemUnmap((CUdeviceptr)pStart, size)); | ||
return UR_RESULT_SUCCESS; | ||
} | ||
|
||
UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemGetInfo( | ||
ur_context_handle_t hContext, const void *pStart, | ||
[[maybe_unused]] size_t size, ur_virtual_mem_info_t propName, | ||
size_t propSize, void *pPropValue, size_t *pPropSizeRet) { | ||
UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); | ||
|
||
ScopedContext Active(hContext); | ||
switch (propName) { | ||
case UR_VIRTUAL_MEM_INFO_ACCESS_MODE: { | ||
CUmemLocation MemLocation = {}; | ||
MemLocation.type = CU_MEM_LOCATION_TYPE_DEVICE; | ||
UR_CHECK_ERROR(GetDeviceOrdinal(hContext->getDevice(), MemLocation.id)); | ||
|
||
unsigned long long CuAccessFlags; | ||
UR_CHECK_ERROR( | ||
cuMemGetAccess(&CuAccessFlags, &MemLocation, (CUdeviceptr)pStart)); | ||
|
||
ur_virtual_mem_access_flags_t UrAccessFlags = 0; | ||
if (CuAccessFlags == CU_MEM_ACCESS_FLAGS_PROT_READWRITE) | ||
UrAccessFlags = UR_VIRTUAL_MEM_ACCESS_FLAG_READ_WRITE; | ||
else if (CuAccessFlags == CU_MEM_ACCESS_FLAGS_PROT_READ) | ||
UrAccessFlags = UR_VIRTUAL_MEM_ACCESS_FLAG_READ_ONLY; | ||
return ReturnValue(UrAccessFlags); | ||
} | ||
default: | ||
return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; | ||
} | ||
return UR_RESULT_SUCCESS; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.