Skip to content

Commit

Permalink
[VK] Implemented DrawStreamOutput() interface in Vulkan backend.
Browse files Browse the repository at this point in the history
- Added VK_EXT_transform_feedback extension.
- Added separate shaders with XFB layout attributes to StreamOutput test to provide differences in SPIR-V for transform-feedback streams.

NOTE: Added workaround for bizarre behavior of vkGetQueryPoolResults() that would corrupt memory by writing a 64-bit value when a 32-bit value was requested.
  • Loading branch information
LukasBanana committed Oct 11, 2024
1 parent 62a600c commit b0a44d1
Show file tree
Hide file tree
Showing 30 changed files with 474 additions and 82 deletions.
25 changes: 23 additions & 2 deletions sources/Renderer/Vulkan/Buffer/VKBuffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ namespace LLGL
{


static constexpr VkDeviceSize k_xfbCounterSize = sizeof(std::uint32_t);

static VkBufferUsageFlags GetVkBufferUsageFlags(const BufferDescriptor& desc)
{
VkBufferUsageFlags flags = VK_BUFFER_USAGE_TRANSFER_DST_BIT;
Expand All @@ -40,6 +42,8 @@ static VkBufferUsageFlags GetVkBufferUsageFlags(const BufferDescriptor& desc)
{
/* Enable transform feedback with extension VK_EXT_transform_feedback */
flags |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT;
flags |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_COUNTER_BUFFER_BIT_EXT;
flags |= VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT;
}
else
{
Expand Down Expand Up @@ -76,12 +80,19 @@ static VkAccessFlags GetBufferVkAccessFlags(long bindFlags)
return accessFlags;
}

static std::uint32_t GetVKBufferStride(const BufferDescriptor& desc)
{
/* Just return first vertex attribute stride, since all attributes must have equal strides within the same buffer */
return (desc.vertexAttribs.empty() ? 1 : std::max<std::uint32_t>(1u, desc.vertexAttribs[0].stride));
}

VKBuffer::VKBuffer(VkDevice device, const BufferDescriptor& desc) :
Buffer { desc.bindFlags },
bufferObj_ { device },
bufferObjStaging_ { device },
size_ { desc.size },
accessFlags_ { GetBufferVkAccessFlags(desc.bindFlags) }
accessFlags_ { GetBufferVkAccessFlags(desc.bindFlags) },
stride_ { GetVKBufferStride(desc) }
{
if ((desc.bindFlags & BindFlags::IndexBuffer) != 0)
indexType_ = VKTypes::ToVkIndexType(desc.format);
Expand All @@ -91,7 +102,7 @@ VKBuffer::VKBuffer(VkDevice device, const BufferDescriptor& desc) :
createInfo.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
createInfo.pNext = nullptr;
createInfo.flags = 0;
createInfo.size = desc.size;
createInfo.size = GetInternalSize();
createInfo.usage = GetVkBufferUsageFlags(desc);
createInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
createInfo.queueFamilyIndexCount = 0;
Expand Down Expand Up @@ -163,6 +174,16 @@ void VKBuffer::Unmap(VKDevice& device)
}
}

VkDeviceSize VKBuffer::GetInternalSize() const
{
return ((GetBindFlags() & BindFlags::StreamOutputBuffer) != 0 ? GetSize() + k_xfbCounterSize : GetSize());
}

VkDeviceSize VKBuffer::GetXfbCounterOffset() const
{
return ((GetBindFlags() & BindFlags::StreamOutputBuffer) != 0 ? GetSize() : 0);
}


} // /namespace LLGL

Expand Down
14 changes: 14 additions & 0 deletions sources/Renderer/Vulkan/Buffer/VKBuffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,13 @@ class VKBuffer : public Buffer
void* Map(VKDevice& device, const CPUAccess access, VkDeviceSize offset, VkDeviceSize length);
void Unmap(VKDevice& device);

// Returns the actual size of this buffer.
// This might be larger than GetSize() if the buffer has additional payload such as the transform-feedback counter.
VkDeviceSize GetInternalSize() const;

// Returns the offset to the transform-feedback counter within this buffer or 0 if there is no such counter.
VkDeviceSize GetXfbCounterOffset() const;

// Returns the device buffer object.
inline VKDeviceBuffer& GetDeviceBuffer()
{
Expand Down Expand Up @@ -91,6 +98,12 @@ class VKBuffer : public Buffer
return accessFlags_;
}

// Returns true element stride this buffer was created with. Currently only used for vertex buffers.
inline std::uint32_t GetStride() const
{
return stride_;
}

private:

VKDeviceBuffer bufferObj_;
Expand All @@ -102,6 +115,7 @@ class VKBuffer : public Buffer
VkIndexType indexType_ = VK_INDEX_TYPE_MAX_ENUM;

VkAccessFlags accessFlags_ = 0;
std::uint32_t stride_ = 0;

};

Expand Down
2 changes: 1 addition & 1 deletion sources/Renderer/Vulkan/Buffer/VKDeviceBuffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ VKDeviceBuffer& VKDeviceBuffer::operator = (VKDeviceBuffer&& rhs) noexcept
void VKDeviceBuffer::CreateVkBuffer(VkDevice device, const VkBufferCreateInfo& createInfo)
{
/* Create Vulkan buffer object and query memory requirements */
auto result = vkCreateBuffer(device, &createInfo, nullptr, buffer_.ReleaseAndGetAddressOf());
VkResult result = vkCreateBuffer(device, &createInfo, nullptr, buffer_.ReleaseAndGetAddressOf());
VKThrowIfFailed(result, "failed to create Vulkan buffer");
vkGetBufferMemoryRequirements(device, buffer_, &requirements_);
}
Expand Down
61 changes: 45 additions & 16 deletions sources/Renderer/Vulkan/Command/VKCommandBuffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -520,6 +520,14 @@ void VKCommandBuffer::SetVertexBuffer(Buffer& buffer)
VkDeviceSize offsets[] = { 0 };

vkCmdBindVertexBuffers(commandBuffer_, 0, 1, buffers, offsets);

/* Store input-assembly state for slot 0 in case it's used for stream-output */
if ((bufferVK.GetBindFlags() & BindFlags::StreamOutputBuffer) != 0)
{
iaState_.ia0VertexStride = bufferVK.GetStride();
iaState_.ia0XfbCounterBuffer = bufferVK.GetVkBuffer();
iaState_.ia0XfbCounterBufferOffset = bufferVK.GetXfbCounterOffset();
}
}

void VKCommandBuffer::SetVertexBufferArray(BufferArray& bufferArray)
Expand Down Expand Up @@ -925,32 +933,50 @@ void VKCommandBuffer::EndRenderCondition()

/* ----- Stream Output ------ */

#if 0
void VKCommandBuffer::SetStreamOutputBuffer(Buffer& buffer)
void VKCommandBuffer::BeginStreamOutput(std::uint32_t numBuffers, Buffer* const * buffers)
{
LLGL_ASSERT_VK_EXT(EXT_transform_feedback);

auto& bufferVK = LLGL_CAST(VKBuffer&, buffer);
/* Get native Vulkan transform-feedback buffers */
VkDeviceSize xfbOffsets[LLGL_MAX_NUM_SO_BUFFERS];
VkDeviceSize xfbSizes[LLGL_MAX_NUM_SO_BUFFERS];

VkBuffer buffers[] = { bufferVK.GetVkBuffer() };
VkDeviceSize offsets[] = { 0 };
VkDeviceSize sizes[] = { bufferVK.GetSize() };
xfbState_.numXfbBuffers = std::min<std::uint32_t>(numBuffers, LLGL_MAX_NUM_SO_BUFFERS);

vkCmdBindTransformFeedbackBuffersEXT(commandBuffer_, 0, 1, buffers, offsets, sizes);
}
#endif
for_range(i, xfbState_.numXfbBuffers)
{
VKBuffer* bufferVK = LLGL_CAST(VKBuffer*, buffers[i]);
xfbState_.xfbBuffers[i] = bufferVK->GetVkBuffer();
xfbState_.xfbCounterOffsets[i] = bufferVK->GetXfbCounterOffset();
xfbOffsets[i] = 0;
xfbSizes[i] = bufferVK->GetSize();
}

void VKCommandBuffer::BeginStreamOutput(std::uint32_t numBuffers, Buffer* const * buffers)
{
LLGL_ASSERT_VK_EXT(EXT_transform_feedback);
//TODO: bind buffers
/* Bind transform-feedback buffers and start recording stream-outpuits */
vkCmdBindTransformFeedbackBuffersEXT(commandBuffer_, 0, xfbState_.numXfbBuffers, xfbState_.xfbBuffers, xfbOffsets, xfbSizes);
vkCmdBeginTransformFeedbackEXT(commandBuffer_, 0, 0, nullptr, nullptr);
}

void VKCommandBuffer::EndStreamOutput()
{
LLGL_ASSERT_VK_EXT(EXT_transform_feedback);
vkCmdEndTransformFeedbackEXT(commandBuffer_, 0, 0, nullptr, nullptr);

/* End transform-feedback and specify counter buffers here to write the final counter values */
vkCmdEndTransformFeedbackEXT(commandBuffer_, 0, xfbState_.numXfbBuffers, xfbState_.xfbBuffers, xfbState_.xfbCounterOffsets);

/* Ensure transform-feedback counter values are accessible in subsequent DrawStreamOutput() commands */
for_range(i, xfbState_.numXfbBuffers)
{
BufferPipelineBarrier(
xfbState_.xfbBuffers[i],
xfbState_.xfbCounterOffsets[i],
sizeof(std::uint32_t),
VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT,
VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT,
VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT,
VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT
);
}
}

/* ----- Drawing ----- */
Expand Down Expand Up @@ -1057,8 +1083,9 @@ void VKCommandBuffer::DrawIndexedIndirect(Buffer& buffer, std::uint64_t offset,

void VKCommandBuffer::DrawStreamOutput()
{
//TODO
//vkCmdDrawIndirectByteCountEXT(commandBuffer_, 1, 0, soBufferIASlot0_, soBufferIASlot0_, soBufferIASlot0CounterOffset_, soBufferIASlot0VertexStride_);
LLGL_ASSERT_VK_EXT(EXT_transform_feedback);
FlushDescriptorCache();
vkCmdDrawIndirectByteCountEXT(commandBuffer_, 1, 0, iaState_.ia0XfbCounterBuffer, iaState_.ia0XfbCounterBufferOffset, 0, iaState_.ia0VertexStride);
}

/* ----- Compute ----- */
Expand Down Expand Up @@ -1332,6 +1359,7 @@ void VKCommandBuffer::ResetBindingStates()
descriptorCache_ = nullptr;
}

#if 0
void VKCommandBuffer::ResetQueryPoolsInFlight()
{
for_range(i, numQueryHeapsInFlight_)
Expand All @@ -1355,6 +1383,7 @@ void VKCommandBuffer::AppendQueryPoolInFlight(VKQueryHeap* queryHeap)
queryHeapsInFlight_[numQueryHeapsInFlight_] = queryHeap;
++numQueryHeapsInFlight_;
}
#endif

std::uint32_t VKCommandBuffer::GetNumVkCommandBuffers(const CommandBufferDescriptor& desc)
{
Expand Down
22 changes: 21 additions & 1 deletion sources/Renderer/Vulkan/Command/VKCommandBuffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,23 @@ class VKCommandBuffer final : public CommandBuffer
// Returns the number of native Vulkan command buffers used for the specified descriptor.
static std::uint32_t GetNumVkCommandBuffers(const CommandBufferDescriptor& desc);

private:

struct InputAssemblyState
{
// Input-assembly state for slot 0 only (IA0)
VkBuffer ia0XfbCounterBuffer = VK_NULL_HANDLE;
VkDeviceSize ia0XfbCounterBufferOffset = 0;
std::uint32_t ia0VertexStride = 0;
};

struct TransformFeedbackState
{
VkBuffer xfbBuffers[LLGL_MAX_NUM_SO_BUFFERS] = {};
VkDeviceSize xfbCounterOffsets[LLGL_MAX_NUM_SO_BUFFERS] = {};
std::uint32_t numXfbBuffers = 0;
};

private:

static constexpr std::uint32_t maxNumCommandBuffers = 3;
Expand Down Expand Up @@ -183,7 +200,10 @@ class VKCommandBuffer final : public CommandBuffer
VKDescriptorCache* descriptorCache_ = nullptr;
VKDescriptorSetWriter descriptorSetWriter_;

#if 1//TODO: optimize usage of query pools
InputAssemblyState iaState_;
TransformFeedbackState xfbState_;

#if 0//TODO: optimize usage of query pools
std::vector<VKQueryHeap*> queryHeapsInFlight_;
std::size_t numQueryHeapsInFlight_ = 0;
#endif
Expand Down
20 changes: 12 additions & 8 deletions sources/Renderer/Vulkan/Command/VKCommandQueue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ bool VKCommandQueue::QueryResult(
auto& queryHeapVK = LLGL_CAST(VKQueryHeap&, queryHeap);

/* Store result directly into output parameter */
auto stateResult = GetQueryResults(queryHeapVK, firstQuery, numQueries, data, dataSize);
VkResult stateResult = GetQueryResults(queryHeapVK, firstQuery, numQueries, data, dataSize);
if (stateResult == VK_NOT_READY)
return false;

Expand Down Expand Up @@ -148,9 +148,10 @@ VkResult VKCommandQueue::GetQueryResults(
else
return VK_ERROR_VALIDATION_FAILED_EXT;

if (queryHeapVK.GetType() == QueryType::TimeElapsed)
/* NOTE: vkGetQueryPoolResults() seems to disregard 32-bit requests and corrupts memory, so we always query with VK_QUERY_RESULT_64_BIT */
if (queryHeapVK.GetType() == QueryType::TimeElapsed || stride != sizeof(std::uint64_t))
{
/* Get elapsed time values from difference between start and end timestamps */
/* Query results individually */
auto dataByteAligned = reinterpret_cast<std::uint8_t*>(data);

for (std::uint32_t query = firstQuery; query < firstQuery + numQueries; ++query)
Expand Down Expand Up @@ -179,6 +180,7 @@ VkResult VKCommandQueue::GetQueryBatchedResults(
VkDeviceSize stride,
VkQueryResultFlags flags)
{
/* Use output buffer directly to store query result */
return vkGetQueryPoolResults(
device_,
queryHeapVK.GetVkQueryPool(),
Expand Down Expand Up @@ -235,17 +237,19 @@ VkResult VKCommandQueue::GetQuerySingleResult(
}
else
{
/* Use output buffer directly to store query result */
/* NOTE: vkGetQueryPoolResults() seems to disregard 32-bit requests and corrupts memory, so we always query with 64-bit values */
std::uint64_t intermediateResult = 0;
result = vkGetQueryPoolResults(
device_,
queryHeapVK.GetVkQueryPool(),
query,
queryHeapVK.GetGroupSize(),
static_cast<std::size_t>(stride),
data,
stride,
1,
static_cast<std::size_t>(intermediateResult),
&intermediateResult,
0,
flags
);
reinterpret_cast<std::uint32_t*>(data)[0] = static_cast<std::uint32_t>(intermediateResult);
}

return result;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ VKPredicateQueryHeap::VKPredicateQueryHeap(
VKDeviceMemoryManager& deviceMemoryManager,
const QueryHeapDescriptor& desc)
:
VKQueryHeap { device, desc },
VKQueryHeap { device, desc, true },
resultBuffer_ { device },
memoryMngr_ { deviceMemoryManager }
{
Expand Down
6 changes: 3 additions & 3 deletions sources/Renderer/Vulkan/RenderState/VKQueryHeap.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,13 +52,13 @@ static VkQueryControlFlags GetQueryControlFlags(const QueryHeapDescriptor& desc)
return flags;
}

VKQueryHeap::VKQueryHeap(VkDevice device, const QueryHeapDescriptor& desc) :
VKQueryHeap::VKQueryHeap(VkDevice device, const QueryHeapDescriptor& desc, bool hasPredicates) :
QueryHeap { desc.type },
hasPredicates_ { hasPredicates },
queryPool_ { device, vkDestroyQueryPool },
controlFlags_ { GetQueryControlFlags(desc) },
groupSize_ { GetQueryGroupSize(desc) },
numQueries_ { desc.numQueries * groupSize_ },
hasPredicates_ { desc.renderCondition }
numQueries_ { desc.numQueries * groupSize_ }
{
/* Create query pool object */
VkQueryPoolCreateInfo createInfo;
Expand Down
4 changes: 2 additions & 2 deletions sources/Renderer/Vulkan/RenderState/VKQueryHeap.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ class VKQueryHeap : public QueryHeap

public:

VKQueryHeap(VkDevice device, const QueryHeapDescriptor& desc);
VKQueryHeap(VkDevice device, const QueryHeapDescriptor& desc, bool hasPredicates = false);

// Returns the Vulkan VkQueryPool object.
inline VkQueryPool GetVkQueryPool() const
Expand Down Expand Up @@ -60,9 +60,9 @@ class VKQueryHeap : public QueryHeap

VKPtr<VkQueryPool> queryPool_;
VkQueryControlFlags controlFlags_ = 0;
const bool hasPredicates_ = false;
std::uint32_t groupSize_ = 1;
std::uint32_t numQueries_ = 0;
bool hasPredicates_ = false;

};

Expand Down
2 changes: 1 addition & 1 deletion sources/Renderer/Vulkan/VKDevice.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ void VKDevice::CreateLogicalDevice(
createInfo.enabledExtensionCount = numExtensions;
createInfo.ppEnabledExtensionNames = extensions;

/* If must pass the feature flags either through the chain of pNext (Vulkan 1.1+), or only through pEnabledFeatures (Vulkan 1.0) */
/* Must pass the feature flags either through the chain of pNext (Vulkan 1.1+), or only through pEnabledFeatures (Vulkan 1.0) */
if (features->pNext != nullptr)
{
createInfo.pNext = features;
Expand Down
Loading

0 comments on commit b0a44d1

Please sign in to comment.