Skip to content

Commit

Permalink
[L0] Add initial USM alloc enqueue API
Browse files Browse the repository at this point in the history
Co-authored-by: Michał Staniewski <michal.staniewski@intel.com>
  • Loading branch information
kswiecicki and staniewzki committed Nov 7, 2024
1 parent af9b768 commit afc2322
Show file tree
Hide file tree
Showing 6 changed files with 424 additions and 95 deletions.
43 changes: 43 additions & 0 deletions source/adapters/level_zero/context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,17 @@ ur_result_t ur_context_handle_t_::initialize() {
.Configs[usm::DisjointPoolMemType::Device])
.second));

MemProvider = umf::memoryProviderMakeUnique<L0DeviceMemoryProvider>(
reinterpret_cast<ur_context_handle_t>(this), Device)
.second;
AsyncDeviceMemPools.emplace(
std::piecewise_construct, std::make_tuple(Device->ZeDevice),
std::make_tuple(umf::poolMakeUniqueFromOps(
umfDisjointPoolOps(), std::move(MemProvider),
&DisjointPoolConfigInstance
.Configs[usm::DisjointPoolMemType::Device])
.second));

MemProvider = umf::memoryProviderMakeUnique<L0SharedMemoryProvider>(
reinterpret_cast<ur_context_handle_t>(this), Device)
.second;
Expand All @@ -212,6 +223,17 @@ ur_result_t ur_context_handle_t_::initialize() {
.Configs[usm::DisjointPoolMemType::Shared])
.second));

MemProvider = umf::memoryProviderMakeUnique<L0SharedMemoryProvider>(
reinterpret_cast<ur_context_handle_t>(this), Device)
.second;
AsyncSharedMemPools.emplace(
std::piecewise_construct, std::make_tuple(Device->ZeDevice),
std::make_tuple(umf::poolMakeUniqueFromOps(
umfDisjointPoolOps(), std::move(MemProvider),
&DisjointPoolConfigInstance
.Configs[usm::DisjointPoolMemType::Shared])
.second));

MemProvider = umf::memoryProviderMakeUnique<L0SharedReadOnlyMemoryProvider>(
reinterpret_cast<ur_context_handle_t>(this), Device)
.second;
Expand All @@ -224,6 +246,18 @@ ur_result_t ur_context_handle_t_::initialize() {
.Configs[usm::DisjointPoolMemType::SharedReadOnly])
.second));

MemProvider = umf::memoryProviderMakeUnique<L0SharedReadOnlyMemoryProvider>(
reinterpret_cast<ur_context_handle_t>(this), Device)
.second;
AsyncSharedReadOnlyMemPools.emplace(
std::piecewise_construct, std::make_tuple(Device->ZeDevice),
std::make_tuple(
umf::poolMakeUniqueFromOps(
umfDisjointPoolOps(), std::move(MemProvider),
&DisjointPoolConfigInstance
.Configs[usm::DisjointPoolMemType::SharedReadOnly])
.second));

MemProvider = umf::memoryProviderMakeUnique<L0DeviceMemoryProvider>(
reinterpret_cast<ur_context_handle_t>(this), Device)
.second;
Expand Down Expand Up @@ -276,6 +310,15 @@ ur_result_t ur_context_handle_t_::initialize() {
&DisjointPoolConfigInstance.Configs[usm::DisjointPoolMemType::Host])
.second;

MemProvider = umf::memoryProviderMakeUnique<L0HostMemoryProvider>(
reinterpret_cast<ur_context_handle_t>(this), nullptr)
.second;
AsyncHostMemPool =
umf::poolMakeUniqueFromOps(
umfDisjointPoolOps(), std::move(MemProvider),
&DisjointPoolConfigInstance.Configs[usm::DisjointPoolMemType::Host])
.second;

MemProvider = umf::memoryProviderMakeUnique<L0HostMemoryProvider>(
reinterpret_cast<ur_context_handle_t>(this), nullptr)
.second;
Expand Down
9 changes: 9 additions & 0 deletions source/adapters/level_zero/context.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,15 @@ struct ur_context_handle_t_ : _ur_object {
SharedReadOnlyMemProxyPools;
umf::pool_unique_handle_t HostMemProxyPool;

// USM pools for async allocations.
std::unordered_map<ze_device_handle_t, umf::pool_unique_handle_t>
AsyncDeviceMemPools;
std::unordered_map<ze_device_handle_t, umf::pool_unique_handle_t>
AsyncSharedMemPools;
std::unordered_map<ze_device_handle_t, umf::pool_unique_handle_t>
AsyncSharedReadOnlyMemPools;
umf::pool_unique_handle_t AsyncHostMemPool;

// Map associating pools created with urUsmPoolCreate and internal pools
std::list<ur_usm_pool_handle_t> UsmPoolHandles{};

Expand Down
2 changes: 1 addition & 1 deletion source/adapters/level_zero/queue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1333,7 +1333,7 @@ ur_queue_handle_t_::executeCommandList(ur_command_list_ptr_t CommandList,
Device->Platform->ContextsMutex, std::defer_lock);

if (IndirectAccessTrackingEnabled) {
// We are going to submit kernels for execution. If indirect access flag is
// We are going to submit kernels for execution. If indirect access flag is
// set for a kernel then we need to make a snapshot of existing memory
// allocations in all contexts in the platform. We need to lock the mutex
// guarding the list of contexts in the platform to prevent creation of new
Expand Down
Loading

0 comments on commit afc2322

Please sign in to comment.