Skip to content

Commit

Permalink
Build AS on first Apply()
Browse files Browse the repository at this point in the history
Then clone to captured AS.  This is much faster than building on each Apply().

Change-Id: I776d06d7a1908020e898b4552568bcc41c2d7509
  • Loading branch information
cmannett85-arm committed Oct 11, 2024
1 parent 97ac0f5 commit 0d103df
Show file tree
Hide file tree
Showing 2 changed files with 136 additions and 96 deletions.
223 changes: 131 additions & 92 deletions renderdoc/driver/vulkan/vk_acceleration_structure.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -675,18 +675,15 @@ bool VulkanAccelerationStructureManager::Serialise(SerialiserType &ser, Resource

if(IsReplayMode(state) && !ser.IsErrored())
{
uploadMemory = CreateReplayMemory(MemoryType::Upload, asInfo->memSize);
uploadMemory =
CreateReplayMemory(MemoryType::Upload, asInfo->memSize,
VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR);
if(uploadMemory.mem == VK_NULL_HANDLE)
{
RDCERR("Failed to allocate AS build data upload buffer");
return false;
}

m_pDriver->AddPendingObjectCleanup([d, uploadMemory]() {
ObjDisp(d)->DestroyBuffer(Unwrap(d), uploadMemory.buf, NULL);
ObjDisp(d)->FreeMemory(Unwrap(d), uploadMemory.mem, NULL);
});

vkr = ObjDisp(d)->MapMemory(Unwrap(d), uploadMemory.mem, 0,
AlignUp(asInfo->memSize, nonCoherentAtomSize), 0,
(void **)&contents);
Expand Down Expand Up @@ -722,48 +719,8 @@ bool VulkanAccelerationStructureManager::Serialise(SerialiserType &ser, Resource

ObjDisp(d)->UnmapMemory(Unwrap(d), uploadMemory.mem);

// Allocate GPU memory and copy the AS input upload data into it
const VkBufferCreateInfo gpuBufInfo = {
VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
NULL,
0,
asInfo->memSize,
VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT |
VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR,
};

VkBuffer gpuBuf = VK_NULL_HANDLE;
vkr = m_pDriver->vkCreateBuffer(d, &gpuBufInfo, NULL, &gpuBuf);
CHECK_VKR(m_pDriver, vkr);

const MemoryAllocation gpuMemory = m_pDriver->AllocateMemoryForResource(
gpuBuf, MemoryScope::InitialContents, MemoryType::GPULocal);
if(gpuMemory.mem == VK_NULL_HANDLE)
{
RDCERR("Failed to allocate AS build data GPU buffer");
return false;
}

vkr = ObjDisp(d)->BindBufferMemory(Unwrap(d), Unwrap(gpuBuf), Unwrap(gpuMemory.mem),
gpuMemory.offs);
CHECK_VKR(m_pDriver, vkr);

VkCommandBuffer cmd = m_pDriver->GetInitStateCmd();
if(cmd == VK_NULL_HANDLE)
{
RDCERR("Couldn't acquire command buffer");
return false;
}

VkBufferCopy region = {
0,
0,
AlignUp(asInfo->memSize, nonCoherentAtomSize),
};
ObjDisp(d)->CmdCopyBuffer(Unwrap(cmd), uploadMemory.buf, Unwrap(gpuBuf), 1, &region);

asInfo->replayBuf = gpuBuf;
asInfo->uploadMem = uploadMemory.mem;
asInfo->uploadBuf = uploadMemory.buf;
}

SERIALISE_CHECK_READ_ERRORS();
Expand Down Expand Up @@ -792,81 +749,158 @@ template bool VulkanAccelerationStructureManager::Serialise(ReadSerialiser &ser,
const VkInitialContents *initial,
CaptureState state);

void VulkanAccelerationStructureManager::Apply(ResourceId id, const VkInitialContents &initial)
void VulkanAccelerationStructureManager::Apply(ResourceId id, VkInitialContents &initial)
{
const VkAccelerationStructureKHR wrappedAS =
m_pDriver->GetResourceManager()->GetCurrentHandle<VkAccelerationStructureKHR>(id);
VkAccelerationStructureInfo *asInfo = initial.accelerationStructureInfo;
RDCASSERT(asInfo);

rdcarray<VkAccelerationStructureBuildRangeInfoKHR> buildRangeInfos = asInfo->getBuildRanges();
rdcarray<VkAccelerationStructureGeometryKHR> asGeomData = asInfo->convertGeometryData();
RDCASSERT(!asGeomData.empty());
RDCASSERT(asInfo->geometryData.size() == asGeomData.size());

VkCommandBuffer cmd;
const VkDevice d = m_pDriver->GetDev();

if(!FixUpReplayBDAs(asInfo, asGeomData))
return;

// Allocate the scratch buffer which involves working out how big it should be
VkAccelerationStructureBuildSizesInfoKHR sizeResult = {
VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR,
};
// If our 'base' AS has not been created yet, build it now
if(asInfo->replayAS == VK_NULL_HANDLE)
{
const VkAccelerationStructureBuildGeometryInfoKHR sizeInfo = {
rdcarray<VkAccelerationStructureBuildRangeInfoKHR> buildRangeInfos = asInfo->getBuildRanges();
rdcarray<VkAccelerationStructureGeometryKHR> asGeomData = asInfo->convertGeometryData();
RDCASSERT(!asGeomData.empty());
RDCASSERT(asInfo->geometryData.size() == asGeomData.size());

if(!FixUpReplayBDAs(asInfo, asGeomData))
return;

// Allocate the scratch buffer which involves working out how big it should be
VkAccelerationStructureBuildSizesInfoKHR sizeResult = {
VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR,
};
{
const VkAccelerationStructureBuildGeometryInfoKHR sizeInfo = {
VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR,
NULL,
asInfo->type,
asInfo->flags,
VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR,
VK_NULL_HANDLE,
VK_NULL_HANDLE,
(uint32_t)asGeomData.size(),
asGeomData.data(),
VK_NULL_HANDLE,
};

rdcarray<uint32_t> counts;
counts.reserve(asGeomData.size());
for(VkAccelerationStructureBuildRangeInfoKHR numPrims : buildRangeInfos)
counts.push_back(numPrims.primitiveCount);

ObjDisp(d)->GetAccelerationStructureBuildSizesKHR(
Unwrap(d), VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR, &sizeInfo, counts.data(),
&sizeResult);
}
UpdateScratch(sizeResult.buildScratchSize);

cmd = m_pDriver->GetInitStateCmd();
if(cmd == VK_NULL_HANDLE)
{
RDCERR("Couldn't acquire command buffer");
return;
}

// Create the base AS
const VkBufferCreateInfo gpuBufInfo = {
VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
NULL,
0,
sizeResult.accelerationStructureSize,
VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR |
VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT_KHR,
};

VkBuffer asBuf = VK_NULL_HANDLE;
VkResult vkr = m_pDriver->vkCreateBuffer(d, &gpuBufInfo, NULL, &asBuf);
CHECK_VKR(m_pDriver, vkr);

VkMemoryRequirements mrq = {};
ObjDisp(d)->GetBufferMemoryRequirements(Unwrap(d), Unwrap(asBuf), &mrq);
mrq.alignment = AlignUp(mrq.alignment, asBufferAlignment);

const MemoryAllocation asMemory = m_pDriver->AllocateMemoryForResource(
true, mrq, MemoryScope::InitialContents, MemoryType::GPULocal);
vkr = m_pDriver->vkBindBufferMemory(d, asBuf, asMemory.mem, asMemory.offs);
CHECK_VKR(m_pDriver, vkr);

const VkAccelerationStructureCreateInfoKHR asCreateInfo = {
VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_KHR,
NULL,
0,
asBuf,
0,
sizeResult.accelerationStructureSize,
asInfo->type,
0x0,
};
m_pDriver->vkCreateAccelerationStructureKHR(d, &asCreateInfo, NULL, &asInfo->replayAS);

// Build the AS
const VkAccelerationStructureBuildGeometryInfoKHR asGeomInfo = {
VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR,
NULL,
asInfo->type,
asInfo->flags,
VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR,
VK_NULL_HANDLE,
VK_NULL_HANDLE,
Unwrap(asInfo->replayAS),
(uint32_t)asGeomData.size(),
asGeomData.data(),
VK_NULL_HANDLE,
NULL,
scratchAddressUnion,
};

rdcarray<uint32_t> counts;
counts.reserve(asGeomData.size());
for(VkAccelerationStructureBuildRangeInfoKHR numPrims : buildRangeInfos)
counts.push_back(numPrims.primitiveCount);
const VkAccelerationStructureBuildRangeInfoKHR *pBuildInfo = buildRangeInfos.data();
ObjDisp(d)->CmdBuildAccelerationStructuresKHR(Unwrap(cmd), 1, &asGeomInfo, &pBuildInfo);

ObjDisp(d)->GetAccelerationStructureBuildSizesKHR(
Unwrap(d), VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR, &sizeInfo, counts.data(),
&sizeResult);
m_pDriver->AddPendingObjectCleanup(
[d, uploadMem = asInfo->uploadMem, uploadBuf = asInfo->uploadBuf]() {
ObjDisp(d)->DestroyBuffer(Unwrap(d), uploadBuf, NULL);
ObjDisp(d)->FreeMemory(Unwrap(d), uploadMem, NULL);
});

// Make sure the AS builds are serialised as the scratch mem is shared
VkMemoryBarrier barrier = {
VK_STRUCTURE_TYPE_MEMORY_BARRIER,
NULL,
VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR,
VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR,
};
ObjDisp(d)->CmdPipelineBarrier(Unwrap(cmd),
VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR,
VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, 0, 1,
&barrier, 0, VK_NULL_HANDLE, 0, VK_NULL_HANDLE);
}
UpdateScratch(sizeResult.buildScratchSize);

// Build the AS
VkCommandBuffer cmd = m_pDriver->GetInitStateCmd();
cmd = m_pDriver->GetInitStateCmd();
if(cmd == VK_NULL_HANDLE)
{
RDCERR("Couldn't acquire command buffer");
return;
}

const VkAccelerationStructureBuildGeometryInfoKHR asGeomInfo = {
VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR,
// Copy the base AS to the captured one to reset it
const VkCopyAccelerationStructureInfoKHR asCopyInfo = {
VK_STRUCTURE_TYPE_COPY_ACCELERATION_STRUCTURE_INFO_KHR,
NULL,
asInfo->type,
asInfo->flags,
VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR,
VK_NULL_HANDLE,
Unwrap(asInfo->replayAS),
Unwrap(wrappedAS),
(uint32_t)asGeomData.size(),
asGeomData.data(),
NULL,
scratchAddressUnion,
VK_COPY_ACCELERATION_STRUCTURE_MODE_CLONE_KHR,
};
ObjDisp(d)->CmdCopyAccelerationStructureKHR(Unwrap(cmd), &asCopyInfo);

const VkAccelerationStructureBuildRangeInfoKHR *pBuildInfo = buildRangeInfos.data();
ObjDisp(d)->CmdBuildAccelerationStructuresKHR(Unwrap(cmd), 1, &asGeomInfo, &pBuildInfo);

// We serialise the AS and OMM builds so we can have just a single scratch buffer and reuse it
m_pDriver->CloseInitStateCmd();
m_pDriver->SubmitCmds();
m_pDriver->FlushQ();
if(Vulkan_Debug_SingleSubmitFlushing())
{
m_pDriver->CloseInitStateCmd();
m_pDriver->SubmitCmds();
m_pDriver->FlushQ();
}
}

VulkanAccelerationStructureManager::Allocation VulkanAccelerationStructureManager::CreateReadBackMemory(
Expand Down Expand Up @@ -1006,7 +1040,7 @@ bool VulkanAccelerationStructureManager::FixUpReplayBDAs(
const VkDevice d = m_pDriver->GetDev();

const VkBufferDeviceAddressInfo addrInfo = {VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO, NULL,
Unwrap(asInfo->replayBuf)};
asInfo->uploadBuf};
const VkDeviceAddress bufAddr = ObjDisp(d)->GetBufferDeviceAddressKHR(Unwrap(d), &addrInfo);

for(size_t i = 0; i < geoms.size(); ++i)
Expand Down Expand Up @@ -1074,6 +1108,11 @@ void VulkanAccelerationStructureManager::UpdateScratch(VkDeviceSize requiredSize
ObjDisp(d)->DestroyBuffer(Unwrap(d), tmp.buf, NULL);
ObjDisp(d)->FreeMemory(Unwrap(d), tmp.mem, NULL);
});

RDCDEBUG("AS build shared scratch changed to size %llu, flushing", requiredSize);
m_pDriver->CloseInitStateCmd();
m_pDriver->SubmitCmds();
m_pDriver->FlushQ();
}

scratch =
Expand Down
9 changes: 5 additions & 4 deletions renderdoc/driver/vulkan/vk_acceleration_structure.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,9 @@ struct VkAccelerationStructureInfo
VkDeviceMemory readbackMem = VK_NULL_HANDLE;
VkDeviceSize memSize = 0;

VkBuffer replayBuf = VK_NULL_HANDLE;
VkDeviceMemory uploadMem = VK_NULL_HANDLE;
VkBuffer uploadBuf = VK_NULL_HANDLE;
VkAccelerationStructureKHR replayAS = VK_NULL_HANDLE;

bool accelerationStructureBuilt = false;

Expand Down Expand Up @@ -110,9 +112,8 @@ class VulkanAccelerationStructureManager
bool Serialise(SerialiserType &ser, ResourceId id, const VkInitialContents *initial,
CaptureState state);

// Called when the initial state is applied. The AS data is deserialised from the upload buffer
// into the acceleration structure
void Apply(ResourceId id, const VkInitialContents &initial);
// Called when the initial state is applied.
void Apply(ResourceId id, VkInitialContents &initial);

private:
struct Allocation
Expand Down

0 comments on commit 0d103df

Please sign in to comment.