Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[UR][CUDA][L0][HIP] Add virtual memory adapter implementations #939

Merged
merged 32 commits into from
Dec 18, 2023
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
5eddd17
[UR][CUDA][L0][HIP] Add virtual memory adapter implementations
steffenlarsen Oct 9, 2023
eed79a7
Remove duplicate CUDA UR_DEVICE_INFO_VIRTUAL_MEMORY_SUPPORT case
steffenlarsen Oct 9, 2023
c5ea815
Fix return of check error
steffenlarsen Oct 9, 2023
986330c
Fix common include in cuda device.hpp
steffenlarsen Oct 10, 2023
44ca139
Zero-initialize CUmemAccessDesc
steffenlarsen Oct 11, 2023
0694ea8
Add missing newline
steffenlarsen Oct 11, 2023
9d7ac52
Fix wrongful use of UR_ASSERT in cuda implementation
steffenlarsen Oct 13, 2023
bae2836
an -> a
steffenlarsen Oct 13, 2023
f9507c0
Merge branch 'adapters' into steffen/virtual_mem_adapters
steffenlarsen Oct 20, 2023
71d1014
Merge branch 'adapters' into steffen/virtual_mem_adapters
steffenlarsen Nov 1, 2023
3c0a444
Merge remote-tracking branch 'intel/adapters' into steffen/virtual_me…
steffenlarsen Nov 21, 2023
cf943a1
Add interfaces to loaders
steffenlarsen Nov 21, 2023
9a50ceb
Add OpenCL interfaces
steffenlarsen Nov 21, 2023
5f680fb
Add NATIVE CPU interfaces
steffenlarsen Nov 21, 2023
dda9c04
Add missed files
steffenlarsen Nov 21, 2023
cd7a552
Remove repeat cases and invalid include
steffenlarsen Nov 21, 2023
d4649a1
Remove another invalid case and fix namespace
steffenlarsen Nov 21, 2023
75c4dd4
Fix wrong die prefix
steffenlarsen Nov 21, 2023
a0b045d
Avoid death
steffenlarsen Nov 22, 2023
31aba08
Fix return value of virtual mem info call
steffenlarsen Nov 23, 2023
b819b42
Merge branch 'adapters' into steffen/virtual_mem_adapters
steffenlarsen Nov 28, 2023
ca9b67a
Remove passing match lines
steffenlarsen Nov 29, 2023
a9204fd
Merge remote-tracking branch 'intel/adapters' into steffen/virtual_me…
steffenlarsen Nov 30, 2023
5480195
Address feedback
steffenlarsen Nov 30, 2023
2940ec8
Remove virtual mem CUDA matches
steffenlarsen Nov 30, 2023
913bcc6
Merge branch 'adapters' into steffen/virtual_mem_adapters
steffenlarsen Dec 1, 2023
c3a98ea
Adjust L0 match file
steffenlarsen Dec 1, 2023
31da61b
Remove L0 Virtual Mem device query from expected failures
steffenlarsen Dec 1, 2023
63d3957
Remove HIP expected failures
steffenlarsen Dec 1, 2023
cdd252f
Merge branch 'adapters' into steffen/virtual_mem_adapters
steffenlarsen Dec 4, 2023
0563259
Merge remote-tracking branch 'intel/main' into steffen/virtual_mem_ad…
steffenlarsen Dec 15, 2023
1678894
Merge branch 'main' into steffen/virtual_mem_adapters
aarongreig Dec 18, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions source/adapters/cuda/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ add_ur_adapter(${TARGET_NAME}
${CMAKE_CURRENT_SOURCE_DIR}/kernel.cpp
${CMAKE_CURRENT_SOURCE_DIR}/memory.hpp
${CMAKE_CURRENT_SOURCE_DIR}/memory.cpp
${CMAKE_CURRENT_SOURCE_DIR}/physical_mem.hpp
${CMAKE_CURRENT_SOURCE_DIR}/physical_mem.cpp
${CMAKE_CURRENT_SOURCE_DIR}/platform.hpp
${CMAKE_CURRENT_SOURCE_DIR}/platform.cpp
${CMAKE_CURRENT_SOURCE_DIR}/program.hpp
Expand All @@ -40,6 +42,7 @@ add_ur_adapter(${TARGET_NAME}
${CMAKE_CURRENT_SOURCE_DIR}/tracing.cpp
${CMAKE_CURRENT_SOURCE_DIR}/usm.cpp
${CMAKE_CURRENT_SOURCE_DIR}/usm_p2p.cpp
${CMAKE_CURRENT_SOURCE_DIR}/virtual_mem.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../ur/ur.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../ur/ur.hpp
)
Expand Down
3 changes: 2 additions & 1 deletion source/adapters/cuda/device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1019,6 +1019,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
// TODO: Investigate if this information is available on CUDA.
case UR_DEVICE_INFO_HOST_PIPE_READ_WRITE_SUPPORTED:
return ReturnValue(false);
case UR_DEVICE_INFO_VIRTUAL_MEMORY_SUPPORT:
return ReturnValue(true);
case UR_DEVICE_INFO_ESIMD_SUPPORT:
return ReturnValue(false);
case UR_DEVICE_INFO_MAX_READ_WRITE_IMAGE_ARGS:
Expand All @@ -1028,7 +1030,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
case UR_DEVICE_INFO_GPU_SUBSLICES_PER_SLICE:
case UR_DEVICE_INFO_GPU_EU_COUNT_PER_SUBSLICE:
case UR_DEVICE_INFO_GPU_HW_THREADS_PER_EU:
case UR_DEVICE_INFO_VIRTUAL_MEMORY_SUPPORT:
return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION;

default:
Expand Down
58 changes: 58 additions & 0 deletions source/adapters/cuda/physical_mem.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
//===--------- physical_mem.cpp - CUDA Adapter ----------------------------===//
//
// Copyright (C) 2023 Intel Corporation
//
// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM
// Exceptions. See LICENSE.TXT
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "physical_mem.hpp"
#include "common.hpp"
#include "context.hpp"
#include "event.hpp"

#include <cassert>
#include <cuda.h>

UR_APIEXPORT ur_result_t UR_APICALL urPhysicalMemCreate(
ur_context_handle_t hContext, ur_device_handle_t hDevice, size_t size,
[[maybe_unused]] const ur_physical_mem_properties_t *pProperties,
ur_physical_mem_handle_t *phPhysicalMem) {
CUmemAllocationProp AllocProps = {};
AllocProps.location.type = CU_MEM_LOCATION_TYPE_DEVICE;
AllocProps.type = CU_MEM_ALLOCATION_TYPE_PINNED;
UR_ASSERT(GetDeviceOrdinal(hDevice, AllocProps.location.id),
UR_RESULT_ERROR_INVALID_DEVICE);

CUmemGenericAllocationHandle ResHandle;
UR_CHECK_ERROR(cuMemCreate(&ResHandle, size, &AllocProps, 0));
steffenlarsen marked this conversation as resolved.
Show resolved Hide resolved
*phPhysicalMem = new ur_physical_mem_handle_t_(ResHandle, hContext);

return UR_RESULT_SUCCESS;
}

UR_APIEXPORT ur_result_t UR_APICALL
urPhysicalMemRetain(ur_physical_mem_handle_t hPhysicalMem) {
hPhysicalMem->incrementReferenceCount();
return UR_RESULT_SUCCESS;
}

UR_APIEXPORT ur_result_t UR_APICALL
urPhysicalMemRelease(ur_physical_mem_handle_t hPhysicalMem) {
if (hPhysicalMem->decrementReferenceCount() > 0)
return UR_RESULT_SUCCESS;

try {
std::unique_ptr<ur_physical_mem_handle_t_> PhysicalMemGuard(hPhysicalMem);

ScopedContext Active(hPhysicalMem->getContext());
UR_CHECK_ERROR(cuMemRelease(hPhysicalMem->get()));
return UR_RESULT_SUCCESS;
} catch (ur_result_t err) {
return err;
} catch (...) {
return UR_RESULT_ERROR_OUT_OF_RESOURCES;
}
}
68 changes: 68 additions & 0 deletions source/adapters/cuda/physical_mem.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
//===---------- physical_mem.hpp - CUDA Adapter ---------------------------===//
//
// Copyright (C) 2023 Intel Corporation
//
// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM
// Exceptions. See LICENSE.TXT
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#pragma once

#include <ur/ur.hpp>

#include <cuda.h>

#include "adapter.hpp"
#include "device.hpp"
#include "platform.hpp"

/// UR queue mapping on physical memory allocations used in virtual memory
/// management.
///
struct ur_physical_mem_handle_t_ {
using native_type = CUmemGenericAllocationHandle;

std::atomic_uint32_t RefCount;
native_type PhysicalMem;
ur_context_handle_t_ *Context;

ur_physical_mem_handle_t_(native_type PhysMem, ur_context_handle_t_ *Ctx)
: RefCount(1), PhysicalMem(PhysMem), Context(Ctx) {
urContextRetain(Context);
}

~ur_physical_mem_handle_t_() { urContextRelease(Context); }

native_type get() const noexcept { return PhysicalMem; }

ur_context_handle_t_ *getContext() const noexcept { return Context; }

uint32_t incrementReferenceCount() noexcept { return ++RefCount; }

uint32_t decrementReferenceCount() noexcept { return --RefCount; }

uint32_t getReferenceCount() const noexcept { return RefCount; }
};

// Find a device ordinal of a device.
inline ur_result_t GetDeviceOrdinal(ur_device_handle_t Device, int &Ordinal) {
ur_adapter_handle_t AdapterHandle = &adapter;
// Get list of platforms
uint32_t NumPlatforms;
UR_ASSERT(urPlatformGet(&AdapterHandle, 1, 0, nullptr, &NumPlatforms),
UR_RESULT_ERROR_INVALID_ARGUMENT);
UR_ASSERT(NumPlatforms, UR_RESULT_ERROR_UNKNOWN);

std::vector<ur_platform_handle_t> Platforms{NumPlatforms};
UR_ASSERT(
urPlatformGet(&AdapterHandle, 1, NumPlatforms, Platforms.data(), nullptr),
UR_RESULT_ERROR_INVALID_ARGUMENT);

// Ordinal corresponds to the platform ID as each device has its own platform.
CUdevice NativeDevice = Device->get();
for (Ordinal = 0; size_t(Ordinal) < Platforms.size(); ++Ordinal)
if (Platforms[Ordinal]->Devices[0]->get() == NativeDevice)
return UR_RESULT_SUCCESS;
return UR_RESULT_ERROR_INVALID_DEVICE;
}
138 changes: 138 additions & 0 deletions source/adapters/cuda/virtual_mem.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
//===--------- virtual_mem.cpp - CUDA Adapter -----------------------------===//
//
// Copyright (C) 2023 Intel Corporation
//
// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM
// Exceptions. See LICENSE.TXT
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "common.hpp"
#include "context.hpp"
#include "event.hpp"
#include "physical_mem.hpp"

#include <cassert>
#include <cuda.h>

UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemGranularityGetInfo(
ur_context_handle_t hContext, ur_device_handle_t hDevice,
ur_virtual_mem_granularity_info_t propName, size_t propSize,
void *pPropValue, size_t *pPropSizeRet) {
UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet);

ScopedContext Active(hContext);
switch (propName) {
case UR_VIRTUAL_MEM_GRANULARITY_INFO_MINIMUM:
case UR_VIRTUAL_MEM_GRANULARITY_INFO_RECOMMENDED: {
CUmemAllocationGranularity_flags Flags =
propName == UR_VIRTUAL_MEM_GRANULARITY_INFO_MINIMUM
? CU_MEM_ALLOC_GRANULARITY_MINIMUM
: CU_MEM_ALLOC_GRANULARITY_RECOMMENDED;
CUmemAllocationProp AllocProps = {};
AllocProps.location.type = CU_MEM_LOCATION_TYPE_DEVICE;
AllocProps.type = CU_MEM_ALLOCATION_TYPE_PINNED;
UR_ASSERT(GetDeviceOrdinal(hDevice, AllocProps.location.id),
UR_RESULT_ERROR_INVALID_DEVICE);

size_t Granularity;
UR_CHECK_ERROR(
cuMemGetAllocationGranularity(&Granularity, &AllocProps, Flags));
return ReturnValue(Granularity);
}
default:
return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION;
}

return UR_RESULT_SUCCESS;
}

UR_APIEXPORT ur_result_t UR_APICALL
urVirtualMemReserve(ur_context_handle_t hContext, const void *pStart,
size_t size, void **ppStart) {
ScopedContext Active(hContext);
UR_CHECK_ERROR(cuMemAddressReserve((CUdeviceptr *)ppStart, size, 0,
(CUdeviceptr)pStart, 0));
return UR_RESULT_SUCCESS;
}

UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemFree(
ur_context_handle_t hContext, const void *pStart, size_t size) {
ScopedContext Active(hContext);
UR_CHECK_ERROR(cuMemAddressFree((CUdeviceptr)pStart, size));
return UR_RESULT_SUCCESS;
}

UR_APIEXPORT ur_result_t UR_APICALL
urVirtualMemSetAccess(ur_context_handle_t hContext, const void *pStart,
size_t size, ur_virtual_mem_access_flags_t flags) {
CUmemAccessDesc AccessDesc;
if (flags & UR_VIRTUAL_MEM_ACCESS_FLAG_READ_WRITE)
AccessDesc.flags = CU_MEM_ACCESS_FLAGS_PROT_READWRITE;
else if (flags & UR_VIRTUAL_MEM_ACCESS_FLAG_READ_ONLY)
AccessDesc.flags = CU_MEM_ACCESS_FLAGS_PROT_READ;
else
AccessDesc.flags = CU_MEM_ACCESS_FLAGS_PROT_NONE;
AccessDesc.location.type = CU_MEM_LOCATION_TYPE_DEVICE;
// TODO: When contexts support multiple devices, we should create a descriptor
// for each. We may also introduce a variant of this function with a
// specific device.
UR_ASSERT(GetDeviceOrdinal(hContext->getDevice(), AccessDesc.location.id),
UR_RESULT_ERROR_INVALID_DEVICE);

ScopedContext Active(hContext);
UR_CHECK_ERROR(cuMemSetAccess((CUdeviceptr)pStart, size, &AccessDesc, 1));
return UR_RESULT_SUCCESS;
}

UR_APIEXPORT ur_result_t UR_APICALL
urVirtualMemMap(ur_context_handle_t hContext, const void *pStart, size_t size,
ur_physical_mem_handle_t hPhysicalMem, size_t offset,
ur_virtual_mem_access_flags_t flags) {
ScopedContext Active(hContext);
UR_CHECK_ERROR(
cuMemMap((CUdeviceptr)pStart, size, offset, hPhysicalMem->get(), 0));
if (flags)
UR_ASSERT(urVirtualMemSetAccess(hContext, pStart, size, flags),
UR_RESULT_ERROR_INVALID_ARGUMENT);
return UR_RESULT_SUCCESS;
}

UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemUnmap(
ur_context_handle_t hContext, const void *pStart, size_t size) {
ScopedContext Active(hContext);
UR_CHECK_ERROR(cuMemUnmap((CUdeviceptr)pStart, size));
return UR_RESULT_SUCCESS;
}

UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemGetInfo(
ur_context_handle_t hContext, const void *pStart,
[[maybe_unused]] size_t size, ur_virtual_mem_info_t propName,
size_t propSize, void *pPropValue, size_t *pPropSizeRet) {
UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet);

ScopedContext Active(hContext);
switch (propName) {
case UR_VIRTUAL_MEM_INFO_ACCESS_MODE: {
CUmemLocation MemLocation = {};
MemLocation.type = CU_MEM_LOCATION_TYPE_DEVICE;
UR_ASSERT(GetDeviceOrdinal(hContext->getDevice(), MemLocation.id),
UR_RESULT_ERROR_INVALID_DEVICE);

unsigned long long CuAccessFlags;
UR_CHECK_ERROR(
cuMemGetAccess(&CuAccessFlags, &MemLocation, (CUdeviceptr)pStart));

ur_virtual_mem_access_flags_t UrAccessFlags = 0;
if (CuAccessFlags == CU_MEM_ACCESS_FLAGS_PROT_READWRITE)
UrAccessFlags = UR_VIRTUAL_MEM_ACCESS_FLAG_READ_WRITE;
else if (CuAccessFlags == CU_MEM_ACCESS_FLAGS_PROT_READ)
UrAccessFlags = UR_VIRTUAL_MEM_ACCESS_FLAG_READ_ONLY;
return ReturnValue(UrAccessFlags);
}
default:
return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION;
}
return UR_RESULT_SUCCESS;
}
3 changes: 3 additions & 0 deletions source/adapters/hip/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,8 @@ add_ur_adapter(${TARGET_NAME}
${CMAKE_CURRENT_SOURCE_DIR}/kernel.cpp
${CMAKE_CURRENT_SOURCE_DIR}/memory.hpp
${CMAKE_CURRENT_SOURCE_DIR}/memory.cpp
${CMAKE_CURRENT_SOURCE_DIR}/physical_mem.hpp
${CMAKE_CURRENT_SOURCE_DIR}/physical_mem.cpp
${CMAKE_CURRENT_SOURCE_DIR}/platform.hpp
${CMAKE_CURRENT_SOURCE_DIR}/platform.cpp
${CMAKE_CURRENT_SOURCE_DIR}/program.hpp
Expand All @@ -73,6 +75,7 @@ add_ur_adapter(${TARGET_NAME}
${CMAKE_CURRENT_SOURCE_DIR}/sampler.cpp
${CMAKE_CURRENT_SOURCE_DIR}/usm.cpp
${CMAKE_CURRENT_SOURCE_DIR}/usm_p2p.cpp
${CMAKE_CURRENT_SOURCE_DIR}/virtual_mem.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../ur/ur.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../ur/ur.hpp
)
Expand Down
2 changes: 2 additions & 0 deletions source/adapters/hip/device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -809,6 +809,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
}
case UR_DEVICE_INFO_HOST_PIPE_READ_WRITE_SUPPORTED:
return ReturnValue(false);
case UR_DEVICE_INFO_VIRTUAL_MEMORY_SUPPORT:
return ReturnValue(false);
case UR_DEVICE_INFO_ESIMD_SUPPORT:
return ReturnValue(false);

Expand Down
36 changes: 36 additions & 0 deletions source/adapters/hip/physical_mem.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
//===--------- physical_mem.cpp - HIP Adapter -----------------------------===//
//
// Copyright (C) 2023 Intel Corporation
//
// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM
// Exceptions. See LICENSE.TXT
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "physical_mem.hpp"
#include "common.hpp"
#include "context.hpp"
#include "event.hpp"

UR_APIEXPORT ur_result_t UR_APICALL urPhysicalMemCreate(
ur_context_handle_t, ur_device_handle_t, size_t,
const ur_physical_mem_properties_t *, ur_physical_mem_handle_t *) {
detail::ur::die(
"Virtual memory extension is not currently implemented for HIP adapter.");
steffenlarsen marked this conversation as resolved.
Show resolved Hide resolved
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}

UR_APIEXPORT ur_result_t UR_APICALL
urPhysicalMemRetain(ur_physical_mem_handle_t) {
detail::ur::die(
"Virtual memory extension is not currently implemented for HIP adapter.");
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}

UR_APIEXPORT ur_result_t UR_APICALL
urPhysicalMemRelease(ur_physical_mem_handle_t) {
detail::ur::die(
"Virtual memory extension is not currently implemented for HIP adapter.");
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
30 changes: 30 additions & 0 deletions source/adapters/hip/physical_mem.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
//===---------- physical_mem.hpp - HIP Adapter ----------------------------===//
//
// Copyright (C) 2023 Intel Corporation
//
// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM
// Exceptions. See LICENSE.TXT
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#pragma once

#include "common.hpp"
#include "device.hpp"
#include "platform.hpp"

/// UR queue mapping on physical memory allocations used in virtual memory
/// management.
/// TODO: Implement.
///
struct ur_physical_mem_handle_t_ {
std::atomic_uint32_t RefCount;

ur_physical_mem_handle_t_() : RefCount(1) {}

uint32_t incrementReferenceCount() noexcept { return ++RefCount; }

uint32_t decrementReferenceCount() noexcept { return --RefCount; }

uint32_t getReferenceCount() const noexcept { return RefCount; }
};
Loading
Loading