Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: Calculate argument buffer allocation size taking variable counts into account. #2199

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions MoltenVK/MoltenVK/Commands/MVKCmdDispatch.mm
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,9 @@
}

void MVKCmdDispatch::encode(MVKCommandEncoder* cmdEncoder) {
if (_groupCountX == 0 || _groupCountY == 0 || _groupCountZ == 0)
return;

MTLRegion mtlThreadgroupCount = MTLRegionMake3D(_baseGroupX, _baseGroupY, _baseGroupZ, _groupCountX, _groupCountY, _groupCountZ);
cmdEncoder->finalizeDispatchState(); // Ensure all updated state has been submitted to Metal
id<MTLComputeCommandEncoder> mtlEncoder = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseDispatch);
Expand Down
2 changes: 1 addition & 1 deletion MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm
Original file line number Diff line number Diff line change
Expand Up @@ -687,7 +687,7 @@ - (void)setDepthBoundsTestAMD:(BOOL)enable minDepth:(float)minDepth maxDepth:(fl

// The Metal arg encoder can only write to one arg buffer at a time (it holds the arg buffer),
// so we need to lock out other access to it while we are writing to it.
auto& mvkArgEnc = useDescSetArgBuff ? dsLayout->getMTLArgumentEncoder() : pipeline->getMTLArgumentEncoder(dsIdx, stage);
auto& mvkArgEnc = useDescSetArgBuff ? dsLayout->getMTLArgumentEncoder(descSet) : pipeline->getMTLArgumentEncoder(dsIdx, stage);
lock_guard<mutex> lock(mvkArgEnc.mtlArgumentEncodingLock);

id<MTLBuffer> mtlArgBuffer = nil;
Expand Down
7 changes: 4 additions & 3 deletions MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.h
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ class MVKDescriptorSetLayoutBinding : public MVKBaseDeviceObject {
* count provided to that descriptor set is returned. Otherwise returns the value
* defined in VkDescriptorSetLayoutBinding::descriptorCount.
*/
uint32_t getDescriptorCount(MVKDescriptorSet* descSet = nullptr) const;
uint32_t getDescriptorCount(MVKDescriptorSet* descSet = nullptr, uint32_t variableDescriptorCount = std::numeric_limits<uint32_t>::max()) const;

/** Returns the descriptor type of this layout. */
inline VkDescriptorType getDescriptorType() { return _info.descriptorType; }
Expand Down Expand Up @@ -170,11 +170,12 @@ class MVKDescriptorSetLayoutBinding : public MVKBaseDeviceObject {
friend class MVKInlineUniformBlockDescriptor;

void initMetalResourceIndexOffsets(const VkDescriptorSetLayoutBinding* pBinding, uint32_t stage);
void addMTLArgumentDescriptors(NSMutableArray<MTLArgumentDescriptor*>* args);
void addMTLArgumentDescriptors(NSMutableArray<MTLArgumentDescriptor*>* args, uint32_t variableDescriptorCount = 0);
void addMTLArgumentDescriptor(NSMutableArray<MTLArgumentDescriptor*>* args,
uint32_t argIndex,
MTLDataType dataType,
MTLArgumentAccess access);
MTLArgumentAccess access,
uint32_t variableDescriptorCount);
bool isUsingMetalArgumentBuffer();
void populateShaderConversionConfig(mvk::SPIRVToMSLConversionConfiguration& shaderConfig,
MVKShaderResourceBinding& dslMTLRezIdxOffsets,
Expand Down
43 changes: 24 additions & 19 deletions MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.mm
Original file line number Diff line number Diff line change
Expand Up @@ -190,14 +190,17 @@ void mvkPopulateShaderConversionConfig(mvk::SPIRVToMSLConversionConfiguration& s

MVKVulkanAPIObject* MVKDescriptorSetLayoutBinding::getVulkanAPIObject() { return _layout; };

uint32_t MVKDescriptorSetLayoutBinding::getDescriptorCount(MVKDescriptorSet* descSet) const {
uint32_t MVKDescriptorSetLayoutBinding::getDescriptorCount(MVKDescriptorSet* descSet, uint32_t variableDescriptorCount) const {

if (_info.descriptorType == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT) {
return 1;
}

if (descSet && hasVariableDescriptorCount()) {
return descSet->_variableDescriptorCount;
if (hasVariableDescriptorCount()) {
if (descSet)
return descSet->_variableDescriptorCount;
if (variableDescriptorCount != std::numeric_limits<uint32_t>::max())
return variableDescriptorCount;
}

return _info.descriptorCount;
Expand Down Expand Up @@ -419,50 +422,50 @@ void mvkPopulateShaderConversionConfig(mvk::SPIRVToMSLConversionConfiguration& s
bool MVKDescriptorSetLayoutBinding::isUsingMetalArgumentBuffer() { return _layout->isUsingMetalArgumentBuffer(); };

// Adds MTLArgumentDescriptors to the array, and updates resource indexes consumed.
void MVKDescriptorSetLayoutBinding::addMTLArgumentDescriptors(NSMutableArray<MTLArgumentDescriptor*>* args) {
void MVKDescriptorSetLayoutBinding::addMTLArgumentDescriptors(NSMutableArray<MTLArgumentDescriptor*>* args, uint32_t variableDescriptorCount) {
switch (getDescriptorType()) {

case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT:
addMTLArgumentDescriptor(args, getMetalResourceIndexOffsets().bufferIndex, MTLDataTypePointer, MTLArgumentAccessReadOnly);
addMTLArgumentDescriptor(args, getMetalResourceIndexOffsets().bufferIndex, MTLDataTypePointer, MTLArgumentAccessReadOnly, variableDescriptorCount);
break;

case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
addMTLArgumentDescriptor(args, getMetalResourceIndexOffsets().bufferIndex, MTLDataTypePointer, MTLArgumentAccessReadWrite);
addMTLArgumentDescriptor(args, getMetalResourceIndexOffsets().bufferIndex, MTLDataTypePointer, MTLArgumentAccessReadWrite, variableDescriptorCount);
break;

case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
addMTLArgumentDescriptor(args, getMetalResourceIndexOffsets().textureIndex, MTLDataTypeTexture, MTLArgumentAccessReadOnly);
addMTLArgumentDescriptor(args, getMetalResourceIndexOffsets().textureIndex, MTLDataTypeTexture, MTLArgumentAccessReadOnly, variableDescriptorCount);
break;

case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
addMTLArgumentDescriptor(args, getMetalResourceIndexOffsets().textureIndex, MTLDataTypeTexture, MTLArgumentAccessReadWrite);
addMTLArgumentDescriptor(args, getMetalResourceIndexOffsets().textureIndex, MTLDataTypeTexture, MTLArgumentAccessReadWrite, variableDescriptorCount);
if (!getPhysicalDevice()->useNativeTextureAtomics()) { // Needed for emulated atomic operations
addMTLArgumentDescriptor(args, getMetalResourceIndexOffsets().bufferIndex, MTLDataTypePointer, MTLArgumentAccessReadWrite);
addMTLArgumentDescriptor(args, getMetalResourceIndexOffsets().bufferIndex, MTLDataTypePointer, MTLArgumentAccessReadWrite, variableDescriptorCount);
}
break;

case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
addMTLArgumentDescriptor(args, getMetalResourceIndexOffsets().textureIndex, MTLDataTypeTexture, MTLArgumentAccessReadOnly);
addMTLArgumentDescriptor(args, getMetalResourceIndexOffsets().textureIndex, MTLDataTypeTexture, MTLArgumentAccessReadOnly, variableDescriptorCount);
break;

case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
addMTLArgumentDescriptor(args, getMetalResourceIndexOffsets().textureIndex, MTLDataTypeTexture, MTLArgumentAccessReadWrite);
addMTLArgumentDescriptor(args, getMetalResourceIndexOffsets().textureIndex, MTLDataTypeTexture, MTLArgumentAccessReadWrite, variableDescriptorCount);
if (!getPhysicalDevice()->useNativeTextureAtomics()) { // Needed for emulated atomic operations
addMTLArgumentDescriptor(args, getMetalResourceIndexOffsets().bufferIndex, MTLDataTypePointer, MTLArgumentAccessReadWrite);
addMTLArgumentDescriptor(args, getMetalResourceIndexOffsets().bufferIndex, MTLDataTypePointer, MTLArgumentAccessReadWrite, variableDescriptorCount);
}
break;

case VK_DESCRIPTOR_TYPE_SAMPLER:
addMTLArgumentDescriptor(args, getMetalResourceIndexOffsets().samplerIndex, MTLDataTypeSampler, MTLArgumentAccessReadOnly);
addMTLArgumentDescriptor(args, getMetalResourceIndexOffsets().samplerIndex, MTLDataTypeSampler, MTLArgumentAccessReadOnly, variableDescriptorCount);
break;

case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
addMTLArgumentDescriptor(args, getMetalResourceIndexOffsets().textureIndex, MTLDataTypeTexture, MTLArgumentAccessReadOnly);
addMTLArgumentDescriptor(args, getMetalResourceIndexOffsets().samplerIndex, MTLDataTypeSampler, MTLArgumentAccessReadOnly);
addMTLArgumentDescriptor(args, getMetalResourceIndexOffsets().textureIndex, MTLDataTypeTexture, MTLArgumentAccessReadOnly, variableDescriptorCount);
addMTLArgumentDescriptor(args, getMetalResourceIndexOffsets().samplerIndex, MTLDataTypeSampler, MTLArgumentAccessReadOnly, variableDescriptorCount);
break;

default:
Expand All @@ -473,8 +476,10 @@ void mvkPopulateShaderConversionConfig(mvk::SPIRVToMSLConversionConfiguration& s
void MVKDescriptorSetLayoutBinding::addMTLArgumentDescriptor(NSMutableArray<MTLArgumentDescriptor*>* args,
uint32_t argIndex,
MTLDataType dataType,
MTLArgumentAccess access) {
uint32_t descCnt = getDescriptorCount();
MTLArgumentAccess access,
uint32_t variableDescriptorCount) {
uint32_t descCnt = getDescriptorCount(nullptr, variableDescriptorCount);

if (descCnt == 0) { return; }

auto* argDesc = [MTLArgumentDescriptor argumentDescriptor];
Expand All @@ -496,10 +501,10 @@ void mvkPopulateShaderConversionConfig(mvk::SPIRVToMSLConversionConfiguration& s
// Establish the resource indices to use, by combining the offsets of the DSL and this DSL binding.
MVKShaderResourceBinding mtlIdxs = _mtlResourceIndexOffsets + dslMTLRezIdxOffsets;

uint32_t descCnt = getDescriptorCount();
uint32_t descCnt = getDescriptorCount(nullptr, 0);
bool isUsingMtlArgBuff = isUsingMetalArgumentBuffer();
for (uint32_t stage = kMVKShaderStageVertex; stage < kMVKShaderStageCount; stage++) {
if ((_applyToStage[stage] || isUsingMtlArgBuff) && descCnt > 0) {
if ((_applyToStage[stage] || isUsingMtlArgBuff) && (descCnt > 0 || hasVariableDescriptorCount())) {
mvkPopulateShaderConversionConfig(shaderConfig,
mtlIdxs.stages[stage],
MVKShaderStage(stage),
Expand Down
12 changes: 10 additions & 2 deletions MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.h
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,10 @@ class MVKDescriptorSetLayout : public MVKVulkanAPIDeviceObject {
bool isUsingMetalArgumentBuffer() { return isUsingMetalArgumentBuffers() && !isPushDescriptorLayout(); };

/** Returns the MTLArgumentEncoder for the descriptor set. */
MVKMTLArgumentEncoder& getMTLArgumentEncoder() { return _mtlArgumentEncoder; }
MVKMTLArgumentEncoder& getMTLArgumentEncoder(MVKDescriptorSet *dsSet);

/** Calculates the length of encoded argument buffer. */
size_t getEncodedArgumentBufferLength(uint32_t variableDescriptorCount = 0);

MVKDescriptorSetLayout(MVKDevice* device, const VkDescriptorSetLayoutCreateInfo* pCreateInfo);

Expand All @@ -135,7 +138,8 @@ class MVKDescriptorSetLayout : public MVKVulkanAPIDeviceObject {
uint32_t getDescriptorIndex(uint32_t binding, uint32_t elementIndex = 0) { return getBinding(binding)->getDescriptorIndex(elementIndex); }
MVKDescriptorSetLayoutBinding* getBinding(uint32_t binding) { return &_bindings[_bindingToIndex[binding]]; }
const VkDescriptorBindingFlags* getBindingFlags(const VkDescriptorSetLayoutCreateInfo* pCreateInfo);
void initMTLArgumentEncoder();
void initMTLArgumentEncoder(MVKMTLArgumentEncoder &encoder, MVKDescriptorSet *dsSet = nullptr);
bool needsDedicatedArgumentEncoder(MVKDescriptorSet *dsSet);

MVKSmallVector<MVKDescriptorSetLayoutBinding> _bindings;
std::unordered_map<uint32_t, uint32_t> _bindingToIndex;
Expand Down Expand Up @@ -203,6 +207,9 @@ class MVKDescriptorSet : public MVKVulkanAPIDeviceObject {
/** Returns the number of descriptors in this descriptor set that use dynamic offsets. */
uint32_t getDynamicOffsetDescriptorCount() { return _dynamicOffsetDescriptorCount; }

/** Returns the MTLArgumentEncoder for the descriptor set. */
MVKMTLArgumentEncoder& getMTLArgumentEncoder() { return _mtlArgumentEncoder; }

MVKDescriptorSet(MVKDescriptorPool* pool);

protected:
Expand All @@ -223,6 +230,7 @@ class MVKDescriptorSet : public MVKVulkanAPIDeviceObject {
NSUInteger _metalArgumentBufferOffset;
uint32_t _dynamicOffsetDescriptorCount;
uint32_t _variableDescriptorCount;
MVKMTLArgumentEncoder _mtlArgumentEncoder;
};


Expand Down
50 changes: 41 additions & 9 deletions MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.mm
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,7 @@
_descriptorCount += _bindings.back().getDescriptorCount();
}

initMTLArgumentEncoder();
initMTLArgumentEncoder(_mtlArgumentEncoder);
}

// Find and return an array of binding flags from the pNext chain of pCreateInfo,
Expand All @@ -262,16 +262,42 @@
return nullptr;
}

void MVKDescriptorSetLayout::initMTLArgumentEncoder() {
if (isUsingDescriptorSetMetalArgumentBuffers() && isUsingMetalArgumentBuffer()) {
@autoreleasepool {
NSMutableArray<MTLArgumentDescriptor*>* args = [NSMutableArray arrayWithCapacity: _bindings.size()];
for (auto& dslBind : _bindings) { dslBind.addMTLArgumentDescriptors(args); }
_mtlArgumentEncoder.init(args.count ? [getMTLDevice() newArgumentEncoderWithArguments: args] : nil);
}
void MVKDescriptorSetLayout::initMTLArgumentEncoder(MVKMTLArgumentEncoder &encoder, MVKDescriptorSet *dsSet) {
if (isUsingDescriptorSetMetalArgumentBuffers() && isUsingMetalArgumentBuffer()) @autoreleasepool {
NSMutableArray<MTLArgumentDescriptor*>* args = [NSMutableArray arrayWithCapacity: _bindings.size()];
for (auto& dslBind : _bindings) { dslBind.addMTLArgumentDescriptors(args, dslBind.getDescriptorCount(dsSet)); }
encoder.init(args.count ? [getMTLDevice() newArgumentEncoderWithArguments: args] : nil);
}
}

MVKMTLArgumentEncoder &MVKDescriptorSetLayout::getMTLArgumentEncoder(MVKDescriptorSet *dsSet) {
if (needsDedicatedArgumentEncoder(dsSet))
return dsSet->getMTLArgumentEncoder();

return _mtlArgumentEncoder;
}

bool MVKDescriptorSetLayout::needsDedicatedArgumentEncoder(MVKDescriptorSet *dsSet) {
return dsSet && _bindings.size() && _bindings.back().hasVariableDescriptorCount() && (dsSet->getDescriptorCount() != _descriptorCount);
}

size_t MVKDescriptorSetLayout::getEncodedArgumentBufferLength(uint32_t variableDescriptorCount) {
if (_bindings.size() > 0) {
auto binding = _bindings.back();
if (binding.hasVariableDescriptorCount() && variableDescriptorCount < binding.getDescriptorCount()) {
@autoreleasepool {
NSMutableArray<MTLArgumentDescriptor*>* args = [NSMutableArray arrayWithCapacity: _bindings.size()];
for (auto& dslBind : _bindings) { dslBind.addMTLArgumentDescriptors(args, variableDescriptorCount); }
auto encoder = [getMTLDevice() newArgumentEncoderWithArguments: args];
auto size = [encoder encodedLength];
[encoder release];
return size;
}
}
}

return _mtlArgumentEncoder.mtlArgumentEncoderSize;
}

#pragma mark -
#pragma mark MVKDescriptorSet
Expand Down Expand Up @@ -373,6 +399,10 @@
_descriptors.push_back(mvkDesc);
}
}

if (layout->needsDedicatedArgumentEncoder(this))
layout->initMTLArgumentEncoder(_mtlArgumentEncoder, this);

return getConfigurationResult();
}

Expand All @@ -392,6 +422,8 @@
_descriptors.shrink_to_fit();
_metalArgumentBufferDirtyDescriptors.resize(0);

_mtlArgumentEncoder = MVKMTLArgumentEncoder();

clearConfigurationResult();
}

Expand Down Expand Up @@ -487,7 +519,7 @@
uint32_t variableDescriptorCount,
VkDescriptorSet* pVKDS) {
VkResult rslt = VK_ERROR_OUT_OF_POOL_MEMORY;
NSUInteger mtlArgBuffAllocSize = mvkDSL->getMTLArgumentEncoder().mtlArgumentEncoderSize;
NSUInteger mtlArgBuffAllocSize = mvkDSL->getEncodedArgumentBufferLength(variableDescriptorCount);
NSUInteger mtlArgBuffAlignedSize = mvkAlignByteCount(mtlArgBuffAllocSize,
getDevice()->_pMetalFeatures->mtlBufferAlignment);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,7 @@ MVK_PUBLIC_SYMBOL bool SPIRVToMSLConverter::convert(SPIRVToMSLConversionConfigur
for (auto& rb : shaderConfig.resourceBindings) {
auto& rbb = rb.resourceBinding;
pMSLCompiler->add_msl_resource_binding(rbb);
if (rbb.count == 0) pMSLCompiler->set_argument_buffer_device_address_space(rbb.desc_set, true);

if (rb.requiresConstExprSampler) {
pMSLCompiler->remap_constexpr_sampler_by_binding(rbb.desc_set, rbb.binding, rb.constExprSampler);
Expand Down
Loading