From 4f58e6db1f333e4b9ba0f3dba5ad5d462371f858 Mon Sep 17 00:00:00 2001 From: Bill Hollings Date: Tue, 4 Jul 2023 10:58:43 -0400 Subject: [PATCH 1/7] Remove option to use Metal argument buffers per pipeline-stage. The pipeline option was structurally permitted and designed for, but was never implemented. With the availability of Metal 3, which provides significant additional bindless argument buffer support, this option is being removed, in favor of future code enhancements that build on Metal 3. - Base all argument buffer support around descriptor sets. - Rename internal functions that test for support for argument buffers. - Update code documentation accordingly. --- MoltenVK/MoltenVK/API/mvk_config.h | 2 +- MoltenVK/MoltenVK/API/mvk_private_api.h | 4 +-- .../Commands/MVKCommandEncoderState.mm | 22 ++++----------- .../MoltenVK/GPUObjects/MVKDescriptorSet.h | 5 +++- .../MoltenVK/GPUObjects/MVKDescriptorSet.mm | 10 +++---- MoltenVK/MoltenVK/GPUObjects/MVKDevice.h | 19 ++++--------- MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm | 22 +++++++-------- MoltenVK/MoltenVK/GPUObjects/MVKImage.mm | 2 +- MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h | 15 ---------- MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm | 28 +++++++------------ Scripts/runcts | 2 +- 11 files changed, 45 insertions(+), 86 deletions(-) diff --git a/MoltenVK/MoltenVK/API/mvk_config.h b/MoltenVK/MoltenVK/API/mvk_config.h index 26a0cbbca..93576324c 100644 --- a/MoltenVK/MoltenVK/API/mvk_config.h +++ b/MoltenVK/MoltenVK/API/mvk_config.h @@ -32,7 +32,7 @@ extern "C" { /** * This header is obsolete and deprecated, and is provided for legacy compatibility only. * - * To configure MoltenVK, use one of the following mechanisms, + * To configure MoltenVK, use one of the following mechanisms, * as documented in MoltenVK_Configuration_Parameters.md: * * - The standard Vulkan VK_EXT_layer_settings extension (layer name "MoltenVK"). diff --git a/MoltenVK/MoltenVK/API/mvk_private_api.h b/MoltenVK/MoltenVK/API/mvk_private_api.h index 43a9d1f83..b0a954d47 100644 --- a/MoltenVK/MoltenVK/API/mvk_private_api.h +++ b/MoltenVK/MoltenVK/API/mvk_private_api.h @@ -352,8 +352,8 @@ typedef struct { uint32_t minSubgroupSize; /**< The minimum number of threads in a SIMD-group. */ VkBool32 textureBarriers; /**< If true, texture barriers are supported within Metal render passes. Deprecated. Will always be false on all platforms. */ VkBool32 tileBasedDeferredRendering; /**< If true, this device uses tile-based deferred rendering. */ - VkBool32 argumentBuffers; /**< If true, Metal argument buffers are supported. */ - VkBool32 descriptorSetArgumentBuffers; /**< If true, a Metal argument buffer can be assigned to a descriptor set, and used on any pipeline and pipeline stage. If false, a different Metal argument buffer must be used for each pipeline-stage/descriptor-set combination. */ + VkBool32 argumentBuffers; /**< If true, Metal argument buffers are supported on the platform. */ + VkBool32 descriptorSetArgumentBuffers; /**< If true, a Metal argument buffers can be used for descriptor sets. */ MVKFloatRounding clearColorFloatRounding; /**< Identifies the type of rounding Metal uses for MTLClearColor float to integer conversions. */ MVKCounterSamplingFlags counterSamplingPoints; /**< Identifies the points where pipeline GPU counter sampling may occur. */ VkBool32 programmableSamplePositions; /**< If true, programmable MSAA sample positions are supported. */ diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm index 9f2318b25..92282a8e4 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm +++ b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm @@ -651,7 +651,7 @@ - (void)setDepthBoundsTestAMD:(BOOL)enable minDepth:(float)minDepth maxDepth:(fl _boundDescriptorSets[descSetIndex] = descSet; - if (descSet->isUsingMetalArgumentBuffers()) { + if (descSet->isUsingMetalArgumentBuffer()) { // If the descriptor set has changed, track new resource usage. if (dsChanged) { auto& usageDirty = _metalUsageDirtyDescriptors[descSetIndex]; @@ -674,9 +674,7 @@ - (void)setDepthBoundsTestAMD:(BOOL)enable minDepth:(float)minDepth maxDepth:(fl // Encode the dirty descriptors to the Metal argument buffer, set the Metal command encoder // usage for each resource, and bind the Metal argument buffer to the command encoder. void MVKResourcesCommandEncoderState::encodeMetalArgumentBuffer(MVKShaderStage stage) { - if ( !_cmdEncoder->isUsingMetalArgumentBuffers() ) { return; } - - bool useDescSetArgBuff = _cmdEncoder->isUsingDescriptorSetMetalArgumentBuffers(); + if ( !_cmdEncoder->isUsingDescriptorSetMetalArgumentBuffers() ) { return; } MVKPipeline* pipeline = getPipeline(); uint32_t dsCnt = pipeline->getDescriptorSetCount(); @@ -688,20 +686,12 @@ - (void)setDepthBoundsTestAMD:(BOOL)enable minDepth:(float)minDepth maxDepth:(fl // The Metal arg encoder can only write to one arg buffer at a time (it holds the arg buffer), // so we need to lock out other access to it while we are writing to it. - auto& mvkArgEnc = useDescSetArgBuff ? dsLayout->getMTLArgumentEncoder() : pipeline->getMTLArgumentEncoder(dsIdx, stage); + auto& mvkArgEnc = dsLayout->getMTLArgumentEncoder(); lock_guard lock(mvkArgEnc.mtlArgumentEncodingLock); - id mtlArgBuffer = nil; - NSUInteger metalArgBufferOffset = 0; + id mtlArgBuffer = descSet->getMetalArgumentBuffer(); + NSUInteger metalArgBufferOffset = descSet->getMetalArgumentBufferOffset(); id mtlArgEncoder = mvkArgEnc.getMTLArgumentEncoder(); - if (useDescSetArgBuff) { - mtlArgBuffer = descSet->getMetalArgumentBuffer(); - metalArgBufferOffset = descSet->getMetalArgumentBufferOffset(); - } else { - // TODO: Source a different arg buffer & offset for each pipeline-stage/desccriptors set - // Also need to only encode the descriptors that are referenced in the shader. - // MVKMTLArgumentEncoder could include an MVKBitArray to track that and have it checked below. - } if ( !(mtlArgEncoder && mtlArgBuffer) ) { continue; } @@ -760,7 +750,7 @@ - (void)setDepthBoundsTestAMD:(BOOL)enable minDepth:(float)minDepth maxDepth:(fl // Mark the resource usage as needing an update for each Metal render encoder. void MVKResourcesCommandEncoderState::markDirty() { MVKCommandEncoderState::markDirty(); - if (_cmdEncoder->isUsingMetalArgumentBuffers()) { + if (_cmdEncoder->isUsingDescriptorSetMetalArgumentBuffers()) { for (uint32_t dsIdx = 0; dsIdx < kMVKMaxDescriptorSetCount; dsIdx++) { _metalUsageDirtyDescriptors[dsIdx].setAllBits(); } diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.h b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.h index 97f51a076..7b0f002b3 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.h @@ -117,7 +117,7 @@ class MVKDescriptorSetLayout : public MVKVulkanAPIDeviceObject { bool isPushDescriptorLayout() const { return _isPushDescriptorLayout; } /** Returns true if this layout is using a Metal argument buffer. */ - bool isUsingMetalArgumentBuffer() { return isUsingMetalArgumentBuffers() && !isPushDescriptorLayout(); }; + bool isUsingMetalArgumentBuffer() { return isUsingDescriptorSetMetalArgumentBuffers() && !isPushDescriptorLayout(); }; /** Returns the MTLArgumentEncoder for the descriptor set. */ MVKMTLArgumentEncoder& getMTLArgumentEncoder() { return _mtlArgumentEncoder; } @@ -203,6 +203,9 @@ class MVKDescriptorSet : public MVKVulkanAPIDeviceObject { /** Returns the number of descriptors in this descriptor set that use dynamic offsets. */ uint32_t getDynamicOffsetDescriptorCount() { return _dynamicOffsetDescriptorCount; } + /** Returns true if this descriptor set is using a Metal argument buffer. */ + bool isUsingMetalArgumentBuffer() { return _layout->isUsingMetalArgumentBuffer(); }; + MVKDescriptorSet(MVKDescriptorPool* pool); protected: diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.mm index 73a564516..e3b1039b1 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.mm @@ -41,7 +41,7 @@ if (cmdEncoder) { cmdEncoder->bindDescriptorSet(pipelineBindPoint, descSetIndex, descSet, dslMTLRezIdxOffsets, dynamicOffsets, dynamicOffsetIndex); } - if ( !isUsingMetalArgumentBuffers() ) { + if ( !isUsingMetalArgumentBuffer() ) { for (auto& dslBind : _bindings) { dslBind.bind(cmdEncoder, pipelineBindPoint, descSet, dslMTLRezIdxOffsets, dynamicOffsets, dynamicOffsetIndex); } @@ -182,7 +182,7 @@ } // Mark if Metal argument buffers are in use, but this descriptor set layout is not using them. - if (isUsingMetalArgumentBuffers() && !isUsingMetalArgumentBuffer()) { + if (isUsingDescriptorSetMetalArgumentBuffers() && !isUsingMetalArgumentBuffer()) { shaderConfig.discreteDescriptorSets.push_back(descSetIndex); } } @@ -265,7 +265,7 @@ } void MVKDescriptorSetLayout::initMTLArgumentEncoder() { - if (isUsingDescriptorSetMetalArgumentBuffers() && isUsingMetalArgumentBuffer()) { + if (isUsingMetalArgumentBuffer()) { @autoreleasepool { NSMutableArray* args = [NSMutableArray arrayWithCapacity: _bindings.size()]; for (auto& dslBind : _bindings) { dslBind.addMTLArgumentDescriptors(args); } @@ -503,7 +503,7 @@ // will fit in the slot that might already have been allocated for it in the Metal argument // buffer from a previous allocation that was returned. If this pool has been reset recently, // then the desc sets will not have had a Metal argument buffer allocation assigned yet. - if (isUsingDescriptorSetMetalArgumentBuffers() && mvkDSL->isUsingMetalArgumentBuffer()) { + if (mvkDSL->isUsingMetalArgumentBuffer()) { // If the offset has not been set (and it's not the first desc set except // on a reset pool), set the offset and update the next available offset value. @@ -819,7 +819,7 @@ metalArgBuffSize = maxMTLBuffSize; } _metalArgumentBuffer = [getMTLDevice() newBufferWithLength: metalArgBuffSize options: MTLResourceStorageModeShared]; // retained - _metalArgumentBuffer.label = @"Argument buffer"; + _metalArgumentBuffer.label = @"Descriptor pool argument buffer"; } } } diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h index 4eab04b33..46f254ae2 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h @@ -352,9 +352,9 @@ class MVKPhysicalDevice : public MVKDispatchableVulkanAPIObject { /** Returns whether the MSL version is supported on this device. */ bool mslVersionIsAtLeast(MTLLanguageVersion minVer) { return _metalFeatures.mslVersionEnum >= minVer; } - /** Returns whether this physical device supports Metal argument buffers. */ - bool supportsMetalArgumentBuffers() { - return _metalFeatures.argumentBuffers && getMVKConfig().useMetalArgumentBuffers != MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS_NEVER; + /** Returns whether this physical device supports using Metal argument buffers for descriptor sets. */ + bool supportsDescriptorSetMetalArgumentBuffers() { + return _metalFeatures.descriptorSetArgumentBuffers && getMVKConfig().useMetalArgumentBuffers != MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS_NEVER; }; /** Returns the MTLStorageMode that matches the Vulkan memory property flags. */ @@ -710,9 +710,6 @@ class MVKDevice : public MVKDispatchableVulkanAPIObject { #pragma mark Metal - /** Returns whether this device is using Metal argument buffers. */ - bool isUsingMetalArgumentBuffers() { return _isUsingMetalArgumentBuffers; }; - /** * Returns an autoreleased options object to be used when compiling MSL shaders. * The requestFastMath parameter is combined with the value of MVKConfiguration::fastMathEnabled @@ -872,7 +869,7 @@ class MVKDevice : public MVKDispatchableVulkanAPIObject { int _capturePipeFileDesc = -1; bool _isPerformanceTracking = false; bool _isCurrentlyAutoGPUCapturing = false; - bool _isUsingMetalArgumentBuffers = false; + bool _isUsingDescriptorSetMetalArgumentBuffers = false; }; @@ -909,14 +906,8 @@ class MVKDeviceTrackingMixin { /** Returns info about the pixel format supported by the physical device. */ MVKPixelFormats* getPixelFormats() { return &_device->_physicalDevice->_pixelFormats; } - /** Returns whether this device is using Metal argument buffers. */ - bool isUsingMetalArgumentBuffers() { return _device->_isUsingMetalArgumentBuffers; }; - /** Returns whether this device is using one Metal argument buffer for each descriptor set, on multiple pipeline and pipeline stages. */ - bool isUsingDescriptorSetMetalArgumentBuffers() { return _device->_isUsingMetalArgumentBuffers && getMetalFeatures().descriptorSetArgumentBuffers; }; - - /** Returns whether this device is using one Metal argument buffer for each descriptor set-pipeline-stage combination. */ - bool isUsingPipelineStageMetalArgumentBuffers() { return _device->_isUsingMetalArgumentBuffers && !getMetalFeatures().descriptorSetArgumentBuffers; }; + bool isUsingDescriptorSetMetalArgumentBuffers() { return _device->_isUsingDescriptorSetMetalArgumentBuffers && getMetalFeatures().descriptorSetArgumentBuffers; }; /** The list of Vulkan extensions, indicating whether each has been enabled by the app for this device. */ MVKExtensionList& getEnabledExtensions() { return _device->_enabledExtensions; } diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm index 0a21c0b54..7137bd732 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm @@ -524,7 +524,7 @@ uint32_t uintMax = std::numeric_limits::max(); uint32_t maxSamplerCnt = getMaxSamplerCount(); - bool isTier2 = supportsMetalArgumentBuffers() && (_metalFeatures.argumentBuffersTier >= MTLArgumentBuffersTier2); + bool isTier2 = supportsDescriptorSetMetalArgumentBuffers() && (_metalFeatures.argumentBuffersTier >= MTLArgumentBuffersTier2); // Create a SSOT for these Vulkan 1.1 properties, which can be queried via two mechanisms here. VkPhysicalDeviceVulkan11Properties supportedProps11; @@ -2397,8 +2397,6 @@ (mvkOSVersionIsAtLeast(11.0) || _properties.vendorID == kIntelVendorId)); #endif - // Currently, if we don't support descriptor set argument buffers, we can't support argument buffers. - _metalFeatures.argumentBuffers = _metalFeatures.descriptorSetArgumentBuffers; if ([_mtlDevice respondsToSelector: @selector(argumentBuffersSupport)]) { _metalFeatures.argumentBuffersTier = _mtlDevice.argumentBuffersSupport; @@ -3068,7 +3066,7 @@ static uint32_t mvkGetEntryProperty(io_registry_entry_t entry, CFStringRef prope // Next 4 bytes contains flags based on enabled Metal features that // might affect the contents of the pipeline cache (mostly MSL content). uint32_t mtlFeatures = 0; - mtlFeatures |= supportsMetalArgumentBuffers() << 0; + mtlFeatures |= supportsDescriptorSetMetalArgumentBuffers() << 0; *(uint32_t*)&_properties.pipelineCacheUUID[uuidComponentOffset] = NSSwapHostIntToBig(mtlFeatures); uuidComponentOffset += sizeof(mtlFeatures); } @@ -3301,7 +3299,7 @@ static uint32_t mvkGetEntryProperty(io_registry_entry_t entry, CFStringRef prope // objects that can be created within the app. When not using argument buffers, no such // limit is imposed. This has been verified with testing up to 1M MTLSamplerStates. uint32_t MVKPhysicalDevice::getMaxSamplerCount() { - if (supportsMetalArgumentBuffers()) { + if (supportsDescriptorSetMetalArgumentBuffers()) { return ([_mtlDevice respondsToSelector: @selector(maxArgumentBufferSamplerCount)] ? (uint32_t)_mtlDevice.maxArgumentBufferSamplerCount : 1024); } else { @@ -4676,7 +4674,7 @@ static uint32_t mvkGetEntryProperty(io_registry_entry_t entry, CFStringRef prope if ( !_defaultMTLSamplerState ) { @autoreleasepool { MTLSamplerDescriptor* mtlSampDesc = [[MTLSamplerDescriptor new] autorelease]; - mtlSampDesc.supportArgumentBuffers = isUsingMetalArgumentBuffers(); + mtlSampDesc.supportArgumentBuffers = _isUsingDescriptorSetMetalArgumentBuffers; _defaultMTLSamplerState = [_physicalDevice->_mtlDevice newSamplerStateWithDescriptor: mtlSampDesc]; // retained } } @@ -4873,12 +4871,12 @@ static uint32_t mvkGetEntryProperty(io_registry_entry_t entry, CFStringRef prope #endif // After enableExtensions && enableFeatures - // Use Metal arg buffs if available, and either config wants them always, - // or config wants them with descriptor indexing and descriptor indexing has been enabled. - _isUsingMetalArgumentBuffers = (_physicalDevice->supportsMetalArgumentBuffers() && - (getMVKConfig().useMetalArgumentBuffers == MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS_ALWAYS || - (getMVKConfig().useMetalArgumentBuffers == MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS_DESCRIPTOR_INDEXING && - (_enabledVulkan12FeaturesNoExt.descriptorIndexing || _enabledExtensions.vk_EXT_descriptor_indexing.enabled)))); + // Use Metal arg buffs if available, and config wants them either always, + // or with descriptor indexing and descriptor indexing has been enabled. + _isUsingDescriptorSetMetalArgumentBuffers = (_physicalDevice->supportsDescriptorSetMetalArgumentBuffers() && + (getMVKConfig().useMetalArgumentBuffers == MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS_ALWAYS || + (getMVKConfig().useMetalArgumentBuffers == MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS_DESCRIPTOR_INDEXING && + (_enabledVulkan12FeaturesNoExt.descriptorIndexing || _enabledExtensions.vk_EXT_descriptor_indexing.enabled)))); _commandResourceFactory = new MVKCommandResourceFactory(this); diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm b/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm index fa48391d9..6b52b5ce5 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm @@ -2458,7 +2458,7 @@ static MSLSamplerYCbCrRange getSpvSamplerYcbcrRangeFromVkSamplerYcbcrRange(VkSam ? mvkClamp(pCreateInfo->maxAnisotropy, 1.0f, getDeviceProperties().limits.maxSamplerAnisotropy) : 1); mtlSampDesc.normalizedCoordinates = !pCreateInfo->unnormalizedCoordinates; - mtlSampDesc.supportArgumentBuffers = isUsingMetalArgumentBuffers(); + mtlSampDesc.supportArgumentBuffers = isUsingDescriptorSetMetalArgumentBuffers(); // If compareEnable is true, but dynamic samplers with depth compare are not available // on this device, this sampler must only be used as an immutable sampler, and will diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h index d67576695..4de395e37 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h @@ -150,9 +150,6 @@ class MVKPipeline : public MVKVulkanAPIDeviceObject { /** Returns whether all internal Metal pipeline states are valid. */ bool hasValidMTLPipelineStates() { return _hasValidMTLPipelineStates; } - /** Returns the MTLArgumentEncoder for the descriptor set. */ - virtual MVKMTLArgumentEncoder& getMTLArgumentEncoder(uint32_t descSetIndex, MVKShaderStage stage) = 0; - /** Returns the array of descriptor binding use for the descriptor set. */ virtual MVKBitArray& getDescriptorBindingUse(uint32_t descSetIndex, MVKShaderStage stage) = 0; @@ -214,10 +211,6 @@ typedef std::pair MVKZeroDivisorVertexBinding; typedef MVKSmallVector MVKPiplineStages; -struct MVKStagedMTLArgumentEncoders { - MVKMTLArgumentEncoder stages[4] = {}; -}; - struct MVKStagedDescriptorBindingUse { MVKBitArray stages[4] = {}; }; @@ -344,9 +337,6 @@ class MVKGraphicsPipeline : public MVKPipeline { /** Returns the collection of instance-rate vertex bindings whose divisor is zero, along with their strides. */ MVKArrayRef getZeroDivisorVertexBindings() { return _zeroDivisorVertexBindings.contents(); } - /** Returns the MTLArgumentEncoder for the descriptor set. */ - MVKMTLArgumentEncoder& getMTLArgumentEncoder(uint32_t descSetIndex, MVKShaderStage stage) override { return _mtlArgumentEncoders[descSetIndex].stages[stage]; } - /** Returns the array of descriptor binding use for the descriptor set. */ MVKBitArray& getDescriptorBindingUse(uint32_t descSetIndex, MVKShaderStage stage) override { return _descriptorBindingUse[descSetIndex].stages[stage]; } @@ -410,7 +400,6 @@ class MVKGraphicsPipeline : public MVKPipeline { MVKSmallVector _sampleLocations; MVKSmallVector _translatedVertexBindings; MVKSmallVector _zeroDivisorVertexBindings; - MVKSmallVector _mtlArgumentEncoders; MVKSmallVector _descriptorBindingUse; MVKSmallVector _stagesUsingPhysicalStorageBufferAddressesCapability; std::unordered_map> _multiviewMTLPipelineStates; @@ -473,9 +462,6 @@ class MVKComputePipeline : public MVKPipeline { /** Returns if this pipeline allows non-zero dispatch bases in vkCmdDispatchBase(). */ bool allowsDispatchBase() { return _allowsDispatchBase; } - /** Returns the MTLArgumentEncoder for the descriptor set. */ - MVKMTLArgumentEncoder& getMTLArgumentEncoder(uint32_t descSetIndex, MVKShaderStage stage) override { return _mtlArgumentEncoders[descSetIndex]; } - /** Returns the array of descriptor binding use for the descriptor set. */ MVKBitArray& getDescriptorBindingUse(uint32_t descSetIndex, MVKShaderStage stage) override { return _descriptorBindingUse[descSetIndex]; } @@ -495,7 +481,6 @@ class MVKComputePipeline : public MVKPipeline { uint32_t getImplicitBufferIndex(uint32_t bufferIndexOffset); id _mtlPipelineState; - MVKSmallVector _mtlArgumentEncoders; MVKSmallVector _descriptorBindingUse; MTLSize _mtlThreadgroupSize; bool _needsSwizzleBuffer = false; diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm index 82f06d731..ef592db16 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm @@ -139,7 +139,7 @@ // If we are using Metal argument buffers, consume a fixed number // of buffer indexes for the Metal argument buffers themselves. - if (isUsingMetalArgumentBuffers()) { + if (isUsingDescriptorSetMetalArgumentBuffers()) { _mtlResourceCounts.addArgumentBuffers(kMVKMaxDescriptorSetCount); } @@ -194,22 +194,16 @@ } } -// For each descriptor set, populate the descriptor bindings used by the shader for this stage, -// and if Metal argument encoders must be dedicated to a pipeline stage, create the encoder here. +// For each descriptor set, populate the descriptor bindings used by the shader for this stage. template void MVKPipeline::addMTLArgumentEncoders(MVKMTLFunction& mvkMTLFunc, const CreateInfo* pCreateInfo, SPIRVToMSLConversionConfiguration& shaderConfig, MVKShaderStage stage) { - if ( !isUsingMetalArgumentBuffers() ) { return; } - - bool needMTLArgEnc = isUsingPipelineStageMetalArgumentBuffers(); - auto mtlFunc = mvkMTLFunc.getMTLFunction(); - for (uint32_t dsIdx = 0; dsIdx < _descriptorSetCount; dsIdx++) { - auto* dsLayout = ((MVKPipelineLayout*)pCreateInfo->layout)->getDescriptorSetLayout(dsIdx); - bool descSetIsUsed = dsLayout->populateBindingUse(getDescriptorBindingUse(dsIdx, stage), shaderConfig, stage, dsIdx); - if (descSetIsUsed && needMTLArgEnc) { - getMTLArgumentEncoder(dsIdx, stage).init([mtlFunc newArgumentEncoderWithBufferIndex: dsIdx]); + if (isUsingDescriptorSetMetalArgumentBuffers()) { + for (uint32_t dsIdx = 0; dsIdx < _descriptorSetCount; dsIdx++) { + auto* dsLayout = ((MVKPipelineLayout*)pCreateInfo->layout)->getDescriptorSetLayout(dsIdx); + dsLayout->populateBindingUse(getDescriptorBindingUse(dsIdx, stage), shaderConfig, stage, dsIdx); } } } @@ -696,8 +690,7 @@ static MVKRenderStateType getRenderStateType(VkDynamicState vkDynamicState) { pipelineStart = mvkGetTimestamp(); } - if (isUsingMetalArgumentBuffers()) { _descriptorBindingUse.resize(_descriptorSetCount); } - if (isUsingPipelineStageMetalArgumentBuffers()) { _mtlArgumentEncoders.resize(_descriptorSetCount); } + if (isUsingDescriptorSetMetalArgumentBuffers()) { _descriptorBindingUse.resize(_descriptorSetCount); } const char* dumpDir = getMVKConfig().shaderDumpDir; if (dumpDir && *dumpDir) { @@ -1744,7 +1737,7 @@ static MTLVertexFormat mvkAdjustFormatVectorToSize(MTLVertexFormat format, uint3 shaderConfig.options.mslOptions.r32ui_linear_texture_alignment = (uint32_t)_device->getVkFormatTexelBufferAlignment(VK_FORMAT_R32_UINT, this); shaderConfig.options.mslOptions.texture_buffer_native = mtlFeats.textureBuffers; - bool useMetalArgBuff = isUsingMetalArgumentBuffers(); + bool useMetalArgBuff = isUsingDescriptorSetMetalArgumentBuffers(); shaderConfig.options.mslOptions.argument_buffers = useMetalArgBuff; shaderConfig.options.mslOptions.force_active_argument_buffer_resources = useMetalArgBuff; shaderConfig.options.mslOptions.pad_argument_buffer_resources = useMetalArgBuff; @@ -2089,8 +2082,7 @@ static MTLVertexFormat mvkAdjustFormatVectorToSize(MTLVertexFormat format, uint3 _allowsDispatchBase = mvkAreAllFlagsEnabled(pCreateInfo->flags, VK_PIPELINE_CREATE_DISPATCH_BASE_BIT); - if (isUsingMetalArgumentBuffers()) { _descriptorBindingUse.resize(_descriptorSetCount); } - if (isUsingPipelineStageMetalArgumentBuffers()) { _mtlArgumentEncoders.resize(_descriptorSetCount); } + if (isUsingDescriptorSetMetalArgumentBuffers()) { _descriptorBindingUse.resize(_descriptorSetCount); } const VkPipelineCreationFeedbackCreateInfo* pFeedbackInfo = nullptr; for (const auto* next = (VkBaseInStructure*)pCreateInfo->pNext; next; next = next->pNext) { @@ -2189,7 +2181,7 @@ static MTLVertexFormat mvkAdjustFormatVectorToSize(MTLVertexFormat format, uint3 shaderConfig.options.mslOptions.texture_1D_as_2D = getMVKConfig().texture1DAs2D; shaderConfig.options.mslOptions.fixed_subgroup_size = mvkIsAnyFlagEnabled(pSS->flags, VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT) ? 0 : mtlFeats.maxSubgroupSize; - bool useMetalArgBuff = isUsingMetalArgumentBuffers(); + bool useMetalArgBuff = isUsingDescriptorSetMetalArgumentBuffers(); shaderConfig.options.mslOptions.argument_buffers = useMetalArgBuff; shaderConfig.options.mslOptions.force_active_argument_buffer_resources = useMetalArgBuff; shaderConfig.options.mslOptions.pad_argument_buffer_resources = useMetalArgBuff; diff --git a/Scripts/runcts b/Scripts/runcts index 6b94317de..c27d2f7c8 100755 --- a/Scripts/runcts +++ b/Scripts/runcts @@ -123,7 +123,7 @@ export MVK_CONFIG_DEBUG=0 export MVK_CONFIG_RESUME_LOST_DEVICE=1 export MVK_CONFIG_FAST_MATH_ENABLED=1 export MVK_CONFIG_FORCE_LOW_POWER_GPU=0 -export MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS=0 #(2 = VK_EXT_descriptor_indexing enabled) +export MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS=0 #(1 = Always, 2 = VK_EXT_descriptor_indexing enabled) export MVK_CONFIG_VK_SEMAPHORE_SUPPORT_STYLE=2 #(2 = MTLEvents always) export MVK_CONFIG_SHADER_COMPRESSION_ALGORITHM=0 #(2 = ZLIB, 3 = LZ4) export MVK_CONFIG_PERFORMANCE_TRACKING=0 From 370e7c14e770ddca71bc99eb34921aa937c76334 Mon Sep 17 00:00:00 2001 From: Bill Hollings Date: Tue, 4 Jul 2023 13:57:58 -0400 Subject: [PATCH 2/7] Support Metal 3 argument buffers. - Add MVKPhysicalDeviceMetalFeatures::needsArgumentBufferEncoders to indicate if Metal argument buffer encoders are needed to populate argument buffer content. - Update MVK_PRIVATE_API_VERSION to 38. - Add MVKArgumentBufferEncoder to handle populating Metal argument buffers with or without Metal argument encoders, and pass this to descriptors. - Indicate support for argument buffers on iOS, when Metal 3 is available. - MVKConfiguration::useMetalArgumentBuffers set to MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS_ALWAYS by default, which means Metal argument buffers are now used by default. - runcts script enables argument buffers by default. --- MoltenVK/MoltenVK/API/mvk_private_api.h | 3 +- .../Commands/MVKCommandEncoderState.mm | 31 +++---- MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.h | 34 ++++++-- MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.mm | 82 ++++++++++++++----- .../MoltenVK/GPUObjects/MVKDescriptorSet.h | 33 ++++---- .../MoltenVK/GPUObjects/MVKDescriptorSet.mm | 63 ++++++++------ MoltenVK/MoltenVK/GPUObjects/MVKDevice.h | 9 +- MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm | 25 +++--- MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h | 8 +- MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm | 20 ++--- MoltenVK/MoltenVK/Utility/MVKEnvironment.h | 4 +- Scripts/runcts | 2 +- 12 files changed, 194 insertions(+), 120 deletions(-) diff --git a/MoltenVK/MoltenVK/API/mvk_private_api.h b/MoltenVK/MoltenVK/API/mvk_private_api.h index b0a954d47..8a92e2453 100644 --- a/MoltenVK/MoltenVK/API/mvk_private_api.h +++ b/MoltenVK/MoltenVK/API/mvk_private_api.h @@ -44,7 +44,7 @@ typedef unsigned long MTLArgumentBuffersTier; */ -#define MVK_PRIVATE_API_VERSION 41 +#define MVK_PRIVATE_API_VERSION 42 #pragma mark - @@ -364,6 +364,7 @@ typedef struct { VkBool32 dynamicVertexStride; /**< If true, VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE is supported. */ VkBool32 needsCubeGradWorkaround; /**< If true, sampling from cube textures with explicit gradients is broken and needs a workaround. */ VkBool32 nativeTextureAtomics; /**< If true, atomic operations on textures are supported natively. */ + VkBool32 needsArgumentBufferEncoders; /**< If true, Metal argument buffer encoders are needed to populate argument buffer content. */ } MVKPhysicalDeviceMetalFeatures; diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm index 92282a8e4..762749662 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm +++ b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm @@ -680,26 +680,18 @@ - (void)setDepthBoundsTestAMD:(BOOL)enable minDepth:(float)minDepth maxDepth:(fl uint32_t dsCnt = pipeline->getDescriptorSetCount(); for (uint32_t dsIdx = 0; dsIdx < dsCnt; dsIdx++) { auto* descSet = _boundDescriptorSets[dsIdx]; - if ( !descSet ) { continue; } - - auto* dsLayout = descSet->getLayout(); + if ( !(descSet && descSet->isUsingMetalArgumentBuffer()) ) { continue; } // The Metal arg encoder can only write to one arg buffer at a time (it holds the arg buffer), // so we need to lock out other access to it while we are writing to it. - auto& mvkArgEnc = dsLayout->getMTLArgumentEncoder(); - lock_guard lock(mvkArgEnc.mtlArgumentEncodingLock); - - id mtlArgBuffer = descSet->getMetalArgumentBuffer(); - NSUInteger metalArgBufferOffset = descSet->getMetalArgumentBufferOffset(); - id mtlArgEncoder = mvkArgEnc.getMTLArgumentEncoder(); - - if ( !(mtlArgEncoder && mtlArgBuffer) ) { continue; } + auto* dsLayout = descSet->getLayout(); + auto& mvkArgEnc = dsLayout->getMVKMTLArgumentEncoder(); + mvkArgEnc.lock(); + MVKArgumentBufferEncoder mvkArgBuffEnc(mvkArgEnc); auto& argBuffDirtyDescs = descSet->getMetalArgumentBufferDirtyDescriptors(); auto& resourceUsageDirtyDescs = _metalUsageDirtyDescriptors[dsIdx]; auto& shaderBindingUsage = pipeline->getDescriptorBindingUse(dsIdx, stage); - - bool mtlArgEncAttached = false; bool shouldBindArgBuffToStage = false; uint32_t dslBindCnt = dsLayout->getBindingCount(); for (uint32_t dslBindIdx = 0; dslBindIdx < dslBindCnt; dslBindIdx++) { @@ -714,21 +706,20 @@ - (void)setDepthBoundsTestAMD:(BOOL)enable minDepth:(float)minDepth maxDepth:(fl if (argBuffDirty || resourceUsageDirty) { // Don't attach the arg buffer to the arg encoder unless something actually needs // to be written to it. We often might only be updating command encoder resource usage. - if (!mtlArgEncAttached && argBuffDirty) { - [mtlArgEncoder setArgumentBuffer: mtlArgBuffer offset: metalArgBufferOffset]; - mtlArgEncAttached = true; + if (argBuffDirty) { + mvkArgBuffEnc.setArgumentBuffer(descSet->getMetalArgumentBuffer(), + descSet->getMetalArgumentBufferOffset()); } auto* mvkDesc = descSet->getDescriptorAt(descIdx); - mvkDesc->encodeToMetalArgumentBuffer(this, mtlArgEncoder, + mvkDesc->encodeToMetalArgumentBuffer(this, mvkArgBuffEnc, dsIdx, dslBind, elemIdx, stage, argBuffDirty, true); } } } } - - // If the arg buffer was attached to the arg encoder, detach it now. - if (mtlArgEncAttached) { [mtlArgEncoder setArgumentBuffer: nil offset: 0]; } + mvkArgBuffEnc.setArgumentBuffer(nil, 0); + mvkArgEnc.unlock(); // If it is needed, bind the Metal argument buffer itself to the command encoder, if (shouldBindArgBuffToStage) { diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.h b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.h index 89536fc5e..91e3ea7c9 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.h @@ -26,6 +26,7 @@ class MVKDescriptorSet; class MVKDescriptorSetLayout; class MVKCommandEncoder; class MVKResourcesCommandEncoderState; +struct MVKMTLArgumentEncoder; #pragma mark MVKShaderStageResourceBinding @@ -195,6 +196,23 @@ class MVKDescriptorSetLayoutBinding : public MVKBaseDeviceObject { #pragma mark - #pragma mark MVKDescriptor +/** + * Helper object to handle the placement of resources into a Metal Argument Buffer + * in a consistent manner, whether or not a MTLArgumentEncoder is required. + */ +typedef struct MVKArgumentBufferEncoder { + void setArgumentBuffer(id mtlArgBuff, NSUInteger mtlArgBuffOfst); + void setBuffer(id mtlBuff, NSUInteger offset, uint32_t index); + void setTexture(id mtlTex, uint32_t index); + void setSamplerState(id mtlSamp, uint32_t index); + MVKArgumentBufferEncoder(MVKMTLArgumentEncoder& mvkMTLArgEnc); +protected: + void* getArgumentPointer(uint32_t index) const; + id _mtlArgumentEncoder = nil; + id _mtlArgumentBuffer = nil; + NSUInteger _mtlArgumentBufferOffset = 0; +} MVKArgumentBufferEncoder; + /** Represents a Vulkan descriptor. */ class MVKDescriptor : public MVKBaseObject { @@ -220,7 +238,7 @@ class MVKDescriptor : public MVKBaseObject { /** Encodes this descriptor to the Metal argument buffer. */ virtual void encodeToMetalArgumentBuffer(MVKResourcesCommandEncoderState* rezEncState, - id mtlArgEncoder, + MVKArgumentBufferEncoder& mvkArgBuffEnc, uint32_t descSetIndex, MVKDescriptorSetLayoutBinding* mvkDSLBind, uint32_t elementIndex, @@ -286,7 +304,7 @@ class MVKBufferDescriptor : public MVKDescriptor { uint32_t& dynamicOffsetIndex) override; void encodeToMetalArgumentBuffer(MVKResourcesCommandEncoderState* rezEncState, - id mtlArgEncoder, + MVKArgumentBufferEncoder& mvkArgBuffEnc, uint32_t descSetIndex, MVKDescriptorSetLayoutBinding* mvkDSLBind, uint32_t elementIndex, @@ -376,7 +394,7 @@ class MVKInlineUniformBlockDescriptor : public MVKDescriptor { uint32_t& dynamicOffsetIndex) override; void encodeToMetalArgumentBuffer(MVKResourcesCommandEncoderState* rezEncState, - id mtlArgEncoder, + MVKArgumentBufferEncoder& mvkArgBuffEnc, uint32_t descSetIndex, MVKDescriptorSetLayoutBinding* mvkDSLBind, uint32_t elementIndex, @@ -426,7 +444,7 @@ class MVKImageDescriptor : public MVKDescriptor { uint32_t& dynamicOffsetIndex) override; void encodeToMetalArgumentBuffer(MVKResourcesCommandEncoderState* rezEncState, - id mtlArgEncoder, + MVKArgumentBufferEncoder& mvkArgBuffEnc, uint32_t descSetIndex, MVKDescriptorSetLayoutBinding* mvkDSLBind, uint32_t elementIndex, @@ -507,7 +525,7 @@ class MVKSamplerDescriptorMixin { uint32_t& dynamicOffsetIndex); void encodeToMetalArgumentBuffer(MVKResourcesCommandEncoderState* rezEncState, - id mtlArgEncoder, + MVKArgumentBufferEncoder& mvkArgBuffEnc, uint32_t descSetIndex, MVKDescriptorSetLayoutBinding* mvkDSLBind, uint32_t elementIndex, @@ -555,7 +573,7 @@ class MVKSamplerDescriptor : public MVKDescriptor, public MVKSamplerDescriptorMi uint32_t& dynamicOffsetIndex) override; void encodeToMetalArgumentBuffer(MVKResourcesCommandEncoderState* rezEncState, - id mtlArgEncoder, + MVKArgumentBufferEncoder& mvkArgBuffEnc, uint32_t descSetIndex, MVKDescriptorSetLayoutBinding* mvkDSLBind, uint32_t elementIndex, @@ -603,7 +621,7 @@ class MVKCombinedImageSamplerDescriptor : public MVKImageDescriptor, public MVKS uint32_t& dynamicOffsetIndex) override; void encodeToMetalArgumentBuffer(MVKResourcesCommandEncoderState* rezEncState, - id mtlArgEncoder, + MVKArgumentBufferEncoder& mvkArgBuffEnc, uint32_t descSetIndex, MVKDescriptorSetLayoutBinding* mvkDSLBind, uint32_t elementIndex, @@ -649,7 +667,7 @@ class MVKTexelBufferDescriptor : public MVKDescriptor { uint32_t& dynamicOffsetIndex) override; void encodeToMetalArgumentBuffer(MVKResourcesCommandEncoderState* rezEncState, - id mtlArgEncoder, + MVKArgumentBufferEncoder& mvkArgBuffEnc, uint32_t descSetIndex, MVKDescriptorSetLayoutBinding* mvkDSLBind, uint32_t elementIndex, diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.mm index 255f9bb3a..6751c3277 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.mm @@ -703,6 +703,46 @@ void mvkPopulateShaderConversionConfig(mvk::SPIRVToMSLConversionConfiguration& s #pragma mark - #pragma mark MVKDescriptor +void MVKArgumentBufferEncoder::setArgumentBuffer(id mtlArgBuff, NSUInteger mtlArgBuffOfst) { + if (_mtlArgumentEncoder && mtlArgBuff != _mtlArgumentBuffer) { + [_mtlArgumentEncoder setArgumentBuffer: mtlArgBuff offset: mtlArgBuffOfst]; + } + _mtlArgumentBuffer = mtlArgBuff; + _mtlArgumentBufferOffset = mtlArgBuffOfst; +} + +void MVKArgumentBufferEncoder::setBuffer(id mtlBuff, NSUInteger offset, uint32_t index) { + if (_mtlArgumentEncoder) { + [_mtlArgumentEncoder setBuffer: mtlBuff offset: offset atIndex: index]; + } else { + *(uint64_t*)getArgumentPointer(index) = mtlBuff.gpuAddress + offset; + } +} + +void MVKArgumentBufferEncoder::setTexture(id mtlTex, uint32_t index) { + if (_mtlArgumentEncoder) { + [_mtlArgumentEncoder setTexture: mtlTex atIndex: index]; + } else { + *(MTLResourceID*)getArgumentPointer(index) = mtlTex.gpuResourceID; + } +} + +void MVKArgumentBufferEncoder::setSamplerState(id mtlSamp, uint32_t index) { + if (_mtlArgumentEncoder) { + [_mtlArgumentEncoder setSamplerState: mtlSamp atIndex: index]; + } else { + *(MTLResourceID*)getArgumentPointer(index) = mtlSamp.gpuResourceID; + } +} + +// Returns the address of the slot at the index within the Metal argument buffer. +// This is based on the Metal 3 design that all arg buffer slots are 64 bits. +void* MVKArgumentBufferEncoder::getArgumentPointer(uint32_t index) const { + return (void*)((uintptr_t)_mtlArgumentBuffer.contents + _mtlArgumentBufferOffset + (index * sizeof(uint64_t))); +} + +MVKArgumentBufferEncoder::MVKArgumentBufferEncoder(MVKMTLArgumentEncoder& mvkMTLArgEnc) : _mtlArgumentEncoder(mvkMTLArgEnc._mtlArgumentEncoder) {} + MTLResourceUsage MVKDescriptor::getMTLResourceUsage() { MTLResourceUsage mtlUsage = MTLResourceUsageRead; switch (getDescriptorType()) { @@ -757,7 +797,7 @@ void mvkPopulateShaderConversionConfig(mvk::SPIRVToMSLConversionConfiguration& s } void MVKBufferDescriptor::encodeToMetalArgumentBuffer(MVKResourcesCommandEncoderState* rezEncState, - id mtlArgEncoder, + MVKArgumentBufferEncoder& mvkArgBuffEnc, uint32_t descSetIndex, MVKDescriptorSetLayoutBinding* mvkDSLBind, uint32_t elementIndex, @@ -766,9 +806,9 @@ void mvkPopulateShaderConversionConfig(mvk::SPIRVToMSLConversionConfiguration& s bool encodeUsage) { if (encodeToArgBuffer) { uint32_t argIdx = mvkDSLBind->getMetalResourceIndexOffsets().bufferIndex + elementIndex; - [mtlArgEncoder setBuffer: _mvkBuffer ? _mvkBuffer->getMTLBuffer() : nil - offset: _mvkBuffer ? _mvkBuffer->getMTLBufferOffset() + _buffOffset : 0 - atIndex: argIdx]; + mvkArgBuffEnc.setBuffer(_mvkBuffer ? _mvkBuffer->getMTLBuffer() : nil, + _mvkBuffer ? _mvkBuffer->getMTLBufferOffset() + _buffOffset : 0, + argIdx); } if (encodeUsage) { id mtlBuffer = _mvkBuffer ? _mvkBuffer->getMTLBuffer() : nil; @@ -842,7 +882,7 @@ void mvkPopulateShaderConversionConfig(mvk::SPIRVToMSLConversionConfiguration& s } void MVKInlineUniformBlockDescriptor::encodeToMetalArgumentBuffer(MVKResourcesCommandEncoderState* rezEncState, - id mtlArgEncoder, + MVKArgumentBufferEncoder& mvkArgBuffEnc, uint32_t descSetIndex, MVKDescriptorSetLayoutBinding* mvkDSLBind, uint32_t elementIndex, @@ -851,9 +891,9 @@ void mvkPopulateShaderConversionConfig(mvk::SPIRVToMSLConversionConfiguration& s bool encodeUsage) { if (encodeToArgBuffer) { uint32_t argIdx = mvkDSLBind->getMetalResourceIndexOffsets().bufferIndex; - [mtlArgEncoder setBuffer: _mvkMTLBufferAllocation ? _mvkMTLBufferAllocation->_mtlBuffer : nil - offset: _mvkMTLBufferAllocation ? _mvkMTLBufferAllocation->_offset : 0 - atIndex: argIdx]; + mvkArgBuffEnc.setBuffer(_mvkMTLBufferAllocation ? _mvkMTLBufferAllocation->_mtlBuffer : nil, + _mvkMTLBufferAllocation ? _mvkMTLBufferAllocation->_offset : 0, + argIdx); } if (encodeUsage) { id mtlBuffer = _mvkMTLBufferAllocation ? _mvkMTLBufferAllocation->_mtlBuffer : nil; @@ -946,7 +986,7 @@ void mvkPopulateShaderConversionConfig(mvk::SPIRVToMSLConversionConfiguration& s } void MVKImageDescriptor::encodeToMetalArgumentBuffer(MVKResourcesCommandEncoderState* rezEncState, - id mtlArgEncoder, + MVKArgumentBufferEncoder& mvkArgBuffEnc, uint32_t descSetIndex, MVKDescriptorSetLayoutBinding* mvkDSLBind, uint32_t elementIndex, @@ -962,7 +1002,7 @@ void mvkPopulateShaderConversionConfig(mvk::SPIRVToMSLConversionConfiguration& s id mtlTexture = _mvkImageView ? _mvkImageView->getMTLTexture(planeIndex) : nil; if (encodeToArgBuffer) { uint32_t argIdx = mvkDSLBind->getMetalResourceIndexOffsets().textureIndex + planeDescIdx; - [mtlArgEncoder setTexture: mtlTexture atIndex: argIdx]; + mvkArgBuffEnc.setTexture(mtlTexture, argIdx); } if (encodeUsage) { rezEncState->encodeResourceUsage(stage, mtlTexture, getMTLResourceUsage(), mvkDSLBind->getMTLRenderStages()); @@ -973,7 +1013,7 @@ void mvkPopulateShaderConversionConfig(mvk::SPIRVToMSLConversionConfiguration& s if (mtlBuff) { if (encodeToArgBuffer) { uint32_t argIdx = mvkDSLBind->getMetalResourceIndexOffsets().bufferIndex + planeDescIdx; - [mtlArgEncoder setBuffer: mtlBuff offset: mtlTex.bufferOffset atIndex: argIdx]; + mvkArgBuffEnc.setBuffer(mtlBuff, mtlTex.bufferOffset, argIdx); } if (encodeUsage) { rezEncState->encodeResourceUsage(stage, mtlBuff, getMTLResourceUsage(), mvkDSLBind->getMTLRenderStages()); @@ -1049,7 +1089,7 @@ void mvkPopulateShaderConversionConfig(mvk::SPIRVToMSLConversionConfiguration& s // Metal validation requires each sampler in an array of samplers to be populated, // even if not used, so populate a default if one hasn't been set. void MVKSamplerDescriptorMixin::encodeToMetalArgumentBuffer(MVKResourcesCommandEncoderState* rezEncState, - id mtlArgEncoder, + MVKArgumentBufferEncoder& mvkArgBuffEnc, uint32_t descSetIndex, MVKDescriptorSetLayoutBinding* mvkDSLBind, uint32_t elementIndex, @@ -1062,7 +1102,7 @@ void mvkPopulateShaderConversionConfig(mvk::SPIRVToMSLConversionConfiguration& s ? mvkSamp->getMTLSamplerState() : mvkDSLBind->getDevice()->getDefaultMTLSamplerState()); uint32_t argIdx = mvkDSLBind->getMetalResourceIndexOffsets().samplerIndex + elementIndex; - [mtlArgEncoder setSamplerState: mtlSamp atIndex: argIdx]; + mvkArgBuffEnc.setSamplerState(mtlSamp, argIdx); } } @@ -1119,14 +1159,14 @@ void mvkPopulateShaderConversionConfig(mvk::SPIRVToMSLConversionConfiguration& s } void MVKSamplerDescriptor::encodeToMetalArgumentBuffer(MVKResourcesCommandEncoderState* rezEncState, - id mtlArgEncoder, + MVKArgumentBufferEncoder& mvkArgBuffEnc, uint32_t descSetIndex, MVKDescriptorSetLayoutBinding* mvkDSLBind, uint32_t elementIndex, MVKShaderStage stage, bool encodeToArgBuffer, bool encodeUsage) { - MVKSamplerDescriptorMixin::encodeToMetalArgumentBuffer(rezEncState, mtlArgEncoder, descSetIndex, mvkDSLBind, elementIndex, stage, encodeToArgBuffer); + MVKSamplerDescriptorMixin::encodeToMetalArgumentBuffer(rezEncState, mvkArgBuffEnc, descSetIndex, mvkDSLBind, elementIndex, stage, encodeToArgBuffer); } void MVKSamplerDescriptor::write(MVKDescriptorSetLayoutBinding* mvkDSLBind, @@ -1170,15 +1210,15 @@ void mvkPopulateShaderConversionConfig(mvk::SPIRVToMSLConversionConfiguration& s } void MVKCombinedImageSamplerDescriptor::encodeToMetalArgumentBuffer(MVKResourcesCommandEncoderState* rezEncState, - id mtlArgEncoder, + MVKArgumentBufferEncoder& mvkArgBuffEnc, uint32_t descSetIndex, MVKDescriptorSetLayoutBinding* mvkDSLBind, uint32_t elementIndex, MVKShaderStage stage, bool encodeToArgBuffer, bool encodeUsage) { - MVKImageDescriptor::encodeToMetalArgumentBuffer(rezEncState, mtlArgEncoder, descSetIndex, mvkDSLBind, elementIndex, stage, encodeToArgBuffer, encodeUsage); - MVKSamplerDescriptorMixin::encodeToMetalArgumentBuffer(rezEncState, mtlArgEncoder, descSetIndex, mvkDSLBind, elementIndex, stage, encodeToArgBuffer); + MVKImageDescriptor::encodeToMetalArgumentBuffer(rezEncState, mvkArgBuffEnc, descSetIndex, mvkDSLBind, elementIndex, stage, encodeToArgBuffer, encodeUsage); + MVKSamplerDescriptorMixin::encodeToMetalArgumentBuffer(rezEncState, mvkArgBuffEnc, descSetIndex, mvkDSLBind, elementIndex, stage, encodeToArgBuffer); } void MVKCombinedImageSamplerDescriptor::write(MVKDescriptorSetLayoutBinding* mvkDSLBind, @@ -1243,7 +1283,7 @@ void mvkPopulateShaderConversionConfig(mvk::SPIRVToMSLConversionConfiguration& s } } void MVKTexelBufferDescriptor::encodeToMetalArgumentBuffer(MVKResourcesCommandEncoderState* rezEncState, - id mtlArgEncoder, + MVKArgumentBufferEncoder& mvkArgBuffEnc, uint32_t descSetIndex, MVKDescriptorSetLayoutBinding* mvkDSLBind, uint32_t elementIndex, @@ -1254,7 +1294,7 @@ void mvkPopulateShaderConversionConfig(mvk::SPIRVToMSLConversionConfiguration& s id mtlTexture = _mvkBufferView ? _mvkBufferView->getMTLTexture() : nil; if (encodeToArgBuffer) { uint32_t argIdx = mvkDSLBind->getMetalResourceIndexOffsets().textureIndex + elementIndex; - [mtlArgEncoder setTexture: mtlTexture atIndex: argIdx]; + mvkArgBuffEnc.setTexture(mtlTexture, argIdx); } if (encodeUsage) { rezEncState->encodeResourceUsage(stage, mtlTexture, getMTLResourceUsage(), mvkDSLBind->getMTLRenderStages()); @@ -1265,7 +1305,7 @@ void mvkPopulateShaderConversionConfig(mvk::SPIRVToMSLConversionConfiguration& s if (mtlBuff) { if (encodeToArgBuffer) { uint32_t argIdx = mvkDSLBind->getMetalResourceIndexOffsets().bufferIndex + elementIndex; - [mtlArgEncoder setBuffer: mtlBuff offset: mtlTexture.bufferOffset atIndex: argIdx]; + mvkArgBuffEnc.setBuffer(mtlBuff, mtlTexture.bufferOffset, argIdx); } if (encodeUsage) { rezEncState->encodeResourceUsage(stage, mtlBuff, getMTLResourceUsage(), mvkDSLBind->getMTLRenderStages()); diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.h b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.h index 7b0f002b3..a26e5531a 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.h @@ -29,25 +29,27 @@ class MVKDescriptorPool; class MVKPipelineLayout; class MVKCommandEncoder; class MVKResourcesCommandEncoderState; +struct MVKArgumentBufferEncoder; #pragma mark - #pragma mark MVKDescriptorSetLayout /** - * Holds and manages the lifecycle of a MTLArgumentEncoder. The encoder can - * only be set once, and copying this object results in an uninitialized - * empty object, since mutex and MTLArgumentEncoder can/should not be copied. + * Holds and manages the lifecycle of a MTLArgumentEncoder, including locking access to the encoder, + * since it can only be used to encode to one argument buffer at a time. The internal MTLArgumentEncoder + * will be nil if the platform supports populating Metal argument buffers without a MTLArgumentEncoder. + * + * Because of the internal mutex, this encoder can only be initialized once, and because + * mutexes cannot be copied, copying this object results in an uninitialized empty object. */ struct MVKMTLArgumentEncoder { - std::mutex mtlArgumentEncodingLock; - NSUInteger mtlArgumentEncoderSize = 0; - - id getMTLArgumentEncoder() { return _mtlArgumentEncoder; } + NSUInteger getEncodedLength() { return _mtlArgumentEncoder.encodedLength; } + void lock() { if (_mtlArgumentEncoder) { _mtlArgumentEncodingLock.lock(); } } + void unlock() { if (_mtlArgumentEncoder) { _mtlArgumentEncodingLock.unlock(); } } void init(id mtlArgEnc) { - if (_mtlArgumentEncoder) { return; } - _mtlArgumentEncoder = mtlArgEnc; // takes ownership - mtlArgumentEncoderSize = mtlArgEnc.encodedLength; + assert( !_mtlArgumentEncoder ); + _mtlArgumentEncoder = mtlArgEnc; } MVKMTLArgumentEncoder(const MVKMTLArgumentEncoder& other) {} @@ -55,8 +57,11 @@ struct MVKMTLArgumentEncoder { MVKMTLArgumentEncoder() {} ~MVKMTLArgumentEncoder() { [_mtlArgumentEncoder release]; } -private: +protected: + friend MVKArgumentBufferEncoder; + id _mtlArgumentEncoder = nil; + std::mutex _mtlArgumentEncodingLock; }; /** Represents a Vulkan descriptor set layout. */ @@ -120,7 +125,7 @@ class MVKDescriptorSetLayout : public MVKVulkanAPIDeviceObject { bool isUsingMetalArgumentBuffer() { return isUsingDescriptorSetMetalArgumentBuffers() && !isPushDescriptorLayout(); }; /** Returns the MTLArgumentEncoder for the descriptor set. */ - MVKMTLArgumentEncoder& getMTLArgumentEncoder() { return _mtlArgumentEncoder; } + MVKMTLArgumentEncoder& getMVKMTLArgumentEncoder() { return _mvkMTLArgumentEncoder; } MVKDescriptorSetLayout(MVKDevice* device, const VkDescriptorSetLayoutCreateInfo* pCreateInfo); @@ -139,7 +144,7 @@ class MVKDescriptorSetLayout : public MVKVulkanAPIDeviceObject { MVKSmallVector _bindings; std::unordered_map _bindingToIndex; - MVKMTLArgumentEncoder _mtlArgumentEncoder; + MVKMTLArgumentEncoder _mvkMTLArgumentEncoder; MVKShaderResourceBinding _mtlResourceCounts; uint32_t _descriptorCount; bool _isPushDescriptorLayout; @@ -293,7 +298,7 @@ class MVKDescriptorPool : public MVKVulkanAPIDeviceObject { VkResult allocateDescriptor(VkDescriptorType descriptorType, MVKDescriptor** pMVKDesc); void freeDescriptor(MVKDescriptor* mvkDesc); void initMetalArgumentBuffer(const VkDescriptorPoolCreateInfo* pCreateInfo); - NSUInteger getMetalArgumentBufferResourceStorageSize(NSUInteger bufferCount, NSUInteger textureCount, NSUInteger samplerCount); + NSUInteger getMetalArgumentBufferEncodedResourceStorageSize(NSUInteger bufferCount, NSUInteger textureCount, NSUInteger samplerCount); MTLArgumentDescriptor* getMTLArgumentDescriptor(MTLDataType resourceType, NSUInteger argIndex, NSUInteger count); size_t getPoolSize(const VkDescriptorPoolCreateInfo* pCreateInfo, VkDescriptorType descriptorType); diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.mm index e3b1039b1..c6bb5eaf4 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.mm @@ -265,11 +265,15 @@ } void MVKDescriptorSetLayout::initMTLArgumentEncoder() { - if (isUsingMetalArgumentBuffer()) { + if (isUsingMetalArgumentBuffer() && needsMetalArgumentBufferEncoders()) { @autoreleasepool { NSMutableArray* args = [NSMutableArray arrayWithCapacity: _bindings.size()]; - for (auto& dslBind : _bindings) { dslBind.addMTLArgumentDescriptors(args); } - _mtlArgumentEncoder.init(args.count ? [getMTLDevice() newArgumentEncoderWithArguments: args] : nil); + for (auto& dslBind : _bindings) { + dslBind.addMTLArgumentDescriptors(args); + } + if (args.count) { + _mvkMTLArgumentEncoder.init([getMTLDevice() newArgumentEncoderWithArguments: args]); + } } } } @@ -483,13 +487,13 @@ return nullptr; } -// Retieves the first available descriptor set from the pool, and configures it. +// Retrieves the first available descriptor set from the pool, and configures it. // If none are available, returns an error. VkResult MVKDescriptorPool::allocateDescriptorSet(MVKDescriptorSetLayout* mvkDSL, uint32_t variableDescriptorCount, VkDescriptorSet* pVKDS) { VkResult rslt = VK_ERROR_OUT_OF_POOL_MEMORY; - NSUInteger mtlArgBuffAllocSize = mvkDSL->getMTLArgumentEncoder().mtlArgumentEncoderSize; + NSUInteger mtlArgBuffAllocSize = mvkDSL->getMVKMTLArgumentEncoder().getEncodedLength(); NSUInteger mtlArgBuffAlignedSize = mvkAlignByteCount(mtlArgBuffAllocSize, getMetalFeatures().mtlBufferAlignment); @@ -796,22 +800,30 @@ } // Each descriptor set uses a separate Metal argument buffer, but all of these descriptor set - // Metal argument buffers share a single MTLBuffer. This single MTLBuffer needs to be large enough - // to hold all of the Metal resources for the descriptors. In addition, depending on the platform, - // a Metal argument buffer may have a fixed overhead storage, in addition to the storage required - // to hold the resources. This overhead per descriptor set is conservatively calculated by measuring - // the size of a Metal argument buffer containing one of each type of resource (S1), and the size - // of a Metal argument buffer containing two of each type of resource (S2), and then calculating - // the fixed overhead per argument buffer as (2 * S1 - S2). To this is added the overhead due to - // the alignment of each descriptor set Metal argument buffer offset. - NSUInteger overheadPerDescSet = (2 * getMetalArgumentBufferResourceStorageSize(1, 1, 1) - - getMetalArgumentBufferResourceStorageSize(2, 2, 2) + - mtlFeats.mtlBufferAlignment); - - // Measure the size of an argument buffer that would hold all of the resources - // managed in this pool, then add any overhead for all the descriptor sets. - NSUInteger metalArgBuffSize = getMetalArgumentBufferResourceStorageSize(mtlBuffCnt, mtlTexCnt, mtlSampCnt); - metalArgBuffSize += (overheadPerDescSet * (pCreateInfo->maxSets - 1)); // metalArgBuffSize already includes overhead for one descriptor set + // Metal argument buffers share a single MTLBuffer. This single MTLBuffer needs to be large + // enough to hold all of the encoded resources for the descriptors. + NSUInteger metalArgBuffSize = 0; + if (needsMetalArgumentBufferEncoders()) { + // If argument buffer encoders are required, depending on the platform, a Metal argument + // buffer may have a fixed overhead storage, in addition to the storage required to hold + // the resources. This overhead per descriptor set is conservatively calculated by measuring + // the size of a Metal argument buffer containing one of each type of resource (S1), and + // the size of a Metal argument buffer containing two of each type of resource (S2), and + // then calculating the fixed overhead per argument buffer as (2 * S1 - S2). To this is + // added the overhead due to the alignment of each descriptor set Metal argument buffer offset. + NSUInteger overheadPerDescSet = (2 * getMetalArgumentBufferEncodedResourceStorageSize(1, 1, 1) - + getMetalArgumentBufferEncodedResourceStorageSize(2, 2, 2) + + mtlFeats.mtlBufferAlignment); + + // Measure the size of an argument buffer that would hold all of the encoded resources + // managed in this pool, then add any overhead for all the descriptor sets. + metalArgBuffSize = getMetalArgumentBufferEncodedResourceStorageSize(mtlBuffCnt, mtlTexCnt, mtlSampCnt); + metalArgBuffSize += (overheadPerDescSet * (pCreateInfo->maxSets - 1)); // metalArgBuffSize already includes overhead for one descriptor set + } else { + // For Metal 3, encoders are not required, and each arg buffer entry fits into 64 bits. + metalArgBuffSize = (mtlBuffCnt + mtlTexCnt + mtlSampCnt) * sizeof(uint64_t); + } + if (metalArgBuffSize) { NSUInteger maxMTLBuffSize = mtlFeats.maxMTLBufferSize; if (metalArgBuffSize > maxMTLBuffSize) { @@ -824,11 +836,12 @@ } } -// Returns the size of a Metal argument buffer containing the number of various types. +// Returns the size of a Metal argument buffer containing the number of various types +// of encoded resources. This is only required if argument buffers are required. // Make sure any call to this function is wrapped in @autoreleasepool. -NSUInteger MVKDescriptorPool::getMetalArgumentBufferResourceStorageSize(NSUInteger bufferCount, - NSUInteger textureCount, - NSUInteger samplerCount) { +NSUInteger MVKDescriptorPool::getMetalArgumentBufferEncodedResourceStorageSize(NSUInteger bufferCount, + NSUInteger textureCount, + NSUInteger samplerCount) { NSMutableArray* args = [NSMutableArray arrayWithCapacity: 3]; NSUInteger argIdx = 0; diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h index 46f254ae2..74f387cf7 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h @@ -903,12 +903,15 @@ class MVKDeviceTrackingMixin { /** Returns whether the GPU is Apple Silicon. */ bool isAppleGPU() { return _device->_physicalDevice->_isAppleGPU; } - /** Returns info about the pixel format supported by the physical device. */ - MVKPixelFormats* getPixelFormats() { return &_device->_physicalDevice->_pixelFormats; } - /** Returns whether this device is using one Metal argument buffer for each descriptor set, on multiple pipeline and pipeline stages. */ bool isUsingDescriptorSetMetalArgumentBuffers() { return _device->_isUsingDescriptorSetMetalArgumentBuffers && getMetalFeatures().descriptorSetArgumentBuffers; }; + /** Returns whether this device needs Metal argument buffer encoders to populate argument buffer content. */ + bool needsMetalArgumentBufferEncoders() { return _device->_physicalDevice->_metalFeatures.needsArgumentBufferEncoders; }; + + /** Returns info about the pixel format supported by the physical device. */ + MVKPixelFormats* getPixelFormats() { return &_device->_physicalDevice->_pixelFormats; } + /** The list of Vulkan extensions, indicating whether each has been enabled by the app for this device. */ MVKExtensionList& getEnabledExtensions() { return _device->_enabledExtensions; } diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm index 7137bd732..92adaf7c2 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm @@ -2388,22 +2388,25 @@ _metalFeatures.mtlBufferAlignment = 256; // Even on Apple Silicon #endif - // Currently, Metal argument buffer support is in beta stage, and is only supported - // on macOS 11.0 (Big Sur) or later, or on older versions of macOS using an Intel GPU. - // Metal argument buffers support is not available on iOS. Development to support iOS - // and a wider combination of GPU's on older macOS versions is under way. -#if MVK_MACOS - _metalFeatures.descriptorSetArgumentBuffers = (_metalFeatures.argumentBuffers && - (mvkOSVersionIsAtLeast(11.0) || - _properties.vendorID == kIntelVendorId)); -#endif - + // Argument buffers if ([_mtlDevice respondsToSelector: @selector(argumentBuffersSupport)]) { _metalFeatures.argumentBuffersTier = _mtlDevice.argumentBuffersSupport; } else { _metalFeatures.argumentBuffersTier = MTLArgumentBuffersTier1; } + // Metal argument buffer support for descriptor sets is supported on macOS 11.0 or later, + // or on older versions of macOS using an Intel GPU, or on iOS & tvOS 16.0 or later (Metal 3). + _metalFeatures.descriptorSetArgumentBuffers = (_metalFeatures.argumentBuffers && + (mvkOSVersionIsAtLeast(11.0, 16.0, 1.0) || + _properties.vendorID == kIntelVendorId)); + + // Argument encoders are not needed if Metal 3 plus Tier 2 argument buffers. + _metalFeatures.needsArgumentBufferEncoders = (_metalFeatures.argumentBuffers && + !(mvkOSVersionIsAtLeast(13.0, 16.0, 1.0) && + supportsMTLGPUFamily(Metal3) && + _metalFeatures.argumentBuffersTier >= MTLArgumentBuffersTier2)); + #define checkSupportsMTLCounterSamplingPoint(mtlSP, mvkSP) \ if ([_mtlDevice respondsToSelector: @selector(supportsCounterSampling:)] && \ [_mtlDevice supportsCounterSampling: MTLCounterSamplingPointAt ##mtlSP ##Boundary]) { \ @@ -4872,7 +4875,7 @@ static uint32_t mvkGetEntryProperty(io_registry_entry_t entry, CFStringRef prope // After enableExtensions && enableFeatures // Use Metal arg buffs if available, and config wants them either always, - // or with descriptor indexing and descriptor indexing has been enabled. + // or with descriptor indexing, and descriptor indexing has been enabled. _isUsingDescriptorSetMetalArgumentBuffers = (_physicalDevice->supportsDescriptorSetMetalArgumentBuffers() && (getMVKConfig().useMetalArgumentBuffers == MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS_ALWAYS || (getMVKConfig().useMetalArgumentBuffers == MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS_DESCRIPTOR_INDEXING && diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h index 4de395e37..d20b7b8de 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h @@ -174,10 +174,10 @@ class MVKPipeline : public MVKVulkanAPIDeviceObject { protected: void propagateDebugName() override {} - template void addMTLArgumentEncoders(MVKMTLFunction& mvkMTLFunc, - const CreateInfo* pCreateInfo, - SPIRVToMSLConversionConfiguration& shaderConfig, - MVKShaderStage stage); + template void populateDescriptorSetBindingUse(MVKMTLFunction& mvkMTLFunc, + const CreateInfo* pCreateInfo, + SPIRVToMSLConversionConfiguration& shaderConfig, + MVKShaderStage stage); MVKPipelineCache* _pipelineCache; MVKShaderImplicitRezBinding _descriptorBufferCounts; diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm index ef592db16..a4cc91e00 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm @@ -196,10 +196,10 @@ // For each descriptor set, populate the descriptor bindings used by the shader for this stage. template -void MVKPipeline::addMTLArgumentEncoders(MVKMTLFunction& mvkMTLFunc, - const CreateInfo* pCreateInfo, - SPIRVToMSLConversionConfiguration& shaderConfig, - MVKShaderStage stage) { +void MVKPipeline::populateDescriptorSetBindingUse(MVKMTLFunction& mvkMTLFunc, + const CreateInfo* pCreateInfo, + SPIRVToMSLConversionConfiguration& shaderConfig, + MVKShaderStage stage) { if (isUsingDescriptorSetMetalArgumentBuffers()) { for (uint32_t dsIdx = 0; dsIdx < _descriptorSetCount; dsIdx++) { auto* dsLayout = ((MVKPipelineLayout*)pCreateInfo->layout)->getDescriptorSetLayout(dsIdx); @@ -1105,7 +1105,7 @@ static MTLVertexFormat mvkAdjustFormatVectorToSize(MTLVertexFormat format, uint3 _needsVertexOutputBuffer = funcRslts.needsOutputBuffer; markIfUsingPhysicalStorageBufferAddressesCapability(funcRslts, kMVKShaderStageVertex); - addMTLArgumentEncoders(func, pCreateInfo, shaderConfig, kMVKShaderStageVertex); + populateDescriptorSetBindingUse(func, pCreateInfo, shaderConfig, kMVKShaderStageVertex); if (funcRslts.isRasterizationDisabled) { pFragmentSS = nullptr; @@ -1179,7 +1179,7 @@ static MTLVertexFormat mvkAdjustFormatVectorToSize(MTLVertexFormat format, uint3 markIfUsingPhysicalStorageBufferAddressesCapability(funcRslts, kMVKShaderStageVertex); } - addMTLArgumentEncoders(func, pCreateInfo, shaderConfig, kMVKShaderStageVertex); + populateDescriptorSetBindingUse(func, pCreateInfo, shaderConfig, kMVKShaderStageVertex); // If we need the swizzle buffer and there's no place to put it, we're in serious trouble. if (!verifyImplicitBuffer(_needsVertexSwizzleBuffer, _swizzleBufferIndex, kMVKShaderStageVertex, "swizzle")) { @@ -1240,7 +1240,7 @@ static MTLVertexFormat mvkAdjustFormatVectorToSize(MTLVertexFormat format, uint3 _needsTessCtlInputBuffer = funcRslts.needsInputThreadgroupMem; markIfUsingPhysicalStorageBufferAddressesCapability(funcRslts, kMVKShaderStageTessCtl); - addMTLArgumentEncoders(func, pCreateInfo, shaderConfig, kMVKShaderStageTessCtl); + populateDescriptorSetBindingUse(func, pCreateInfo, shaderConfig, kMVKShaderStageTessCtl); if (!verifyImplicitBuffer(_needsTessCtlSwizzleBuffer, _swizzleBufferIndex, kMVKShaderStageTessCtl, "swizzle")) { return false; @@ -1300,7 +1300,7 @@ static MTLVertexFormat mvkAdjustFormatVectorToSize(MTLVertexFormat format, uint3 _needsTessEvalDynamicOffsetBuffer = funcRslts.needsDynamicOffsetBuffer; markIfUsingPhysicalStorageBufferAddressesCapability(funcRslts, kMVKShaderStageTessEval); - addMTLArgumentEncoders(func, pCreateInfo, shaderConfig, kMVKShaderStageTessEval); + populateDescriptorSetBindingUse(func, pCreateInfo, shaderConfig, kMVKShaderStageTessEval); if (funcRslts.isRasterizationDisabled) { pFragmentSS = nullptr; @@ -1369,7 +1369,7 @@ static MTLVertexFormat mvkAdjustFormatVectorToSize(MTLVertexFormat format, uint3 _needsFragmentViewRangeBuffer = funcRslts.needsViewRangeBuffer; markIfUsingPhysicalStorageBufferAddressesCapability(funcRslts, kMVKShaderStageFragment); - addMTLArgumentEncoders(func, pCreateInfo, shaderConfig, kMVKShaderStageFragment); + populateDescriptorSetBindingUse(func, pCreateInfo, shaderConfig, kMVKShaderStageFragment); if (!verifyImplicitBuffer(_needsFragmentSwizzleBuffer, _swizzleBufferIndex, kMVKShaderStageFragment, "swizzle")) { return false; @@ -2230,7 +2230,7 @@ static MTLVertexFormat mvkAdjustFormatVectorToSize(MTLVertexFormat format, uint3 _needsDispatchBaseBuffer = funcRslts.needsDispatchBaseBuffer; _usesPhysicalStorageBufferAddressesCapability = funcRslts.usesPhysicalStorageBufferAddressesCapability; - addMTLArgumentEncoders(func, pCreateInfo, shaderConfig, kMVKShaderStageCompute); + populateDescriptorSetBindingUse(func, pCreateInfo, shaderConfig, kMVKShaderStageCompute); return func; } diff --git a/MoltenVK/MoltenVK/Utility/MVKEnvironment.h b/MoltenVK/MoltenVK/Utility/MVKEnvironment.h index b0a8eb42c..2ae9742f5 100644 --- a/MoltenVK/MoltenVK/Utility/MVKEnvironment.h +++ b/MoltenVK/MoltenVK/Utility/MVKEnvironment.h @@ -323,9 +323,9 @@ void mvkSetConfig(MVKConfiguration& dstMVKConfig, const MVKConfiguration& srcMVK # define MVK_CONFIG_RESUME_LOST_DEVICE 0 #endif -/** Support Metal argument buffers. Disabled by default. */ +/** Support Metal argument buffers. Enabled by default. */ #ifndef MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS -# define MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS_NEVER +# define MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS_ALWAYS #endif /** Compress MSL shader source code in a pipeline cache. Defaults to no compression. */ diff --git a/Scripts/runcts b/Scripts/runcts index c27d2f7c8..982ba2fbf 100755 --- a/Scripts/runcts +++ b/Scripts/runcts @@ -123,7 +123,7 @@ export MVK_CONFIG_DEBUG=0 export MVK_CONFIG_RESUME_LOST_DEVICE=1 export MVK_CONFIG_FAST_MATH_ENABLED=1 export MVK_CONFIG_FORCE_LOW_POWER_GPU=0 -export MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS=0 #(1 = Always, 2 = VK_EXT_descriptor_indexing enabled) +export MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS=1 #(1 = Always, 2 = VK_EXT_descriptor_indexing enabled) export MVK_CONFIG_VK_SEMAPHORE_SUPPORT_STYLE=2 #(2 = MTLEvents always) export MVK_CONFIG_SHADER_COMPRESSION_ALGORITHM=0 #(2 = ZLIB, 3 = LZ4) export MVK_CONFIG_PERFORMANCE_TRACKING=0 From 4a9f7e559d2c776d2182cb30b475ec738214b470 Mon Sep 17 00:00:00 2001 From: Bill Hollings Date: Thu, 13 Jul 2023 14:52:10 -0400 Subject: [PATCH 3/7] Update descriptor set argument buffer directly from vkUpdateDescriptorSets(). - Combine MVKMTLArgumentEncoder and MVKArgumentBufferEncoder into MVKMetalArgumentBuffer, which holds an arg buffer and its encoder, removing need to lock the encoder. - MVKDescriptorSet tracks argument buffer, and calls to vkUpdateDescriptorSets() directly insert resources into the argument buffer. - Add MVKDescriptor encodeResourceUsage() to encode indirect resource usage when using argument buffers. --- .../Commands/MVKCommandEncoderState.mm | 35 +- MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.h | 175 +++----- MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.mm | 408 ++++++++---------- .../MoltenVK/GPUObjects/MVKDescriptorSet.h | 78 ++-- .../MoltenVK/GPUObjects/MVKDescriptorSet.mm | 130 ++++-- MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm | 1 + 6 files changed, 384 insertions(+), 443 deletions(-) diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm index 762749662..eff18582c 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm +++ b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm @@ -671,8 +671,8 @@ - (void)setDepthBoundsTestAMD:(BOOL)enable minDepth:(float)minDepth maxDepth:(fl } } -// Encode the dirty descriptors to the Metal argument buffer, set the Metal command encoder -// usage for each resource, and bind the Metal argument buffer to the command encoder. +// Encode the Metal command encoder usage for each resource, +// and bind the Metal argument buffer to the command encoder. void MVKResourcesCommandEncoderState::encodeMetalArgumentBuffer(MVKShaderStage stage) { if ( !_cmdEncoder->isUsingDescriptorSetMetalArgumentBuffers() ) { return; } @@ -682,17 +682,11 @@ - (void)setDepthBoundsTestAMD:(BOOL)enable minDepth:(float)minDepth maxDepth:(fl auto* descSet = _boundDescriptorSets[dsIdx]; if ( !(descSet && descSet->isUsingMetalArgumentBuffer()) ) { continue; } - // The Metal arg encoder can only write to one arg buffer at a time (it holds the arg buffer), - // so we need to lock out other access to it while we are writing to it. auto* dsLayout = descSet->getLayout(); - auto& mvkArgEnc = dsLayout->getMVKMTLArgumentEncoder(); - mvkArgEnc.lock(); - MVKArgumentBufferEncoder mvkArgBuffEnc(mvkArgEnc); - - auto& argBuffDirtyDescs = descSet->getMetalArgumentBufferDirtyDescriptors(); auto& resourceUsageDirtyDescs = _metalUsageDirtyDescriptors[dsIdx]; auto& shaderBindingUsage = pipeline->getDescriptorBindingUse(dsIdx, stage); bool shouldBindArgBuffToStage = false; + uint32_t dslBindCnt = dsLayout->getBindingCount(); for (uint32_t dslBindIdx = 0; dslBindIdx < dslBindCnt; dslBindIdx++) { auto* dslBind = dsLayout->getBindingAt(dslBindIdx); @@ -701,31 +695,20 @@ - (void)setDepthBoundsTestAMD:(BOOL)enable minDepth:(float)minDepth maxDepth:(fl uint32_t elemCnt = dslBind->getDescriptorCount(descSet); for (uint32_t elemIdx = 0; elemIdx < elemCnt; elemIdx++) { uint32_t descIdx = dslBind->getDescriptorIndex(elemIdx); - bool argBuffDirty = argBuffDirtyDescs.getBit(descIdx, true); - bool resourceUsageDirty = resourceUsageDirtyDescs.getBit(descIdx, true); - if (argBuffDirty || resourceUsageDirty) { - // Don't attach the arg buffer to the arg encoder unless something actually needs - // to be written to it. We often might only be updating command encoder resource usage. - if (argBuffDirty) { - mvkArgBuffEnc.setArgumentBuffer(descSet->getMetalArgumentBuffer(), - descSet->getMetalArgumentBufferOffset()); - } + if (resourceUsageDirtyDescs.getBit(descIdx, true)) { auto* mvkDesc = descSet->getDescriptorAt(descIdx); - mvkDesc->encodeToMetalArgumentBuffer(this, mvkArgBuffEnc, - dsIdx, dslBind, elemIdx, - stage, argBuffDirty, true); + mvkDesc->encodeResourceUsage(this, dslBind, stage); } } } } - mvkArgBuffEnc.setArgumentBuffer(nil, 0); - mvkArgEnc.unlock(); // If it is needed, bind the Metal argument buffer itself to the command encoder, if (shouldBindArgBuffToStage) { + auto& mvkArgBuff = descSet->getMetalArgumentBuffer(); MVKMTLBufferBinding bb; - bb.mtlBuffer = descSet->getMetalArgumentBuffer(); - bb.offset = descSet->getMetalArgumentBufferOffset(); + bb.mtlBuffer = mvkArgBuff.getMetalArgumentBuffer(); + bb.offset = mvkArgBuff.getMetalArgumentBufferOffset(); bb.index = dsIdx; bindMetalArgumentBuffer(stage, bb); } @@ -734,7 +717,7 @@ - (void)setDepthBoundsTestAMD:(BOOL)enable minDepth:(float)minDepth maxDepth:(fl // the contents of Metal argument buffers. Triggering an extraction of the arg buffer // contents here, after filling it, seems to correct that. // Sigh. A bug report has been filed with Apple. - if (getDevice()->isCurrentlyAutoGPUCapturing()) { [descSet->getMetalArgumentBuffer() contents]; } + if (getDevice()->isCurrentlyAutoGPUCapturing()) { [descSet->getMetalArgumentBuffer().getMetalArgumentBuffer() contents]; } } } diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.h b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.h index 91e3ea7c9..755556c28 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.h @@ -26,7 +26,10 @@ class MVKDescriptorSet; class MVKDescriptorSetLayout; class MVKCommandEncoder; class MVKResourcesCommandEncoderState; -struct MVKMTLArgumentEncoder; + + +// The size of one Metal3 Argument Buffer slot in bytes. +static const size_t kMVKMetal3ArgBuffSlotSizeInBytes = sizeof(uint64_t); #pragma mark MVKShaderStageResourceBinding @@ -110,7 +113,7 @@ class MVKDescriptorSetLayoutBinding : public MVKBaseDeviceObject { uint32_t getDescriptorCount(MVKDescriptorSet* descSet = nullptr) const; /** Returns the descriptor type of this layout. */ - inline VkDescriptorType getDescriptorType() { return _info.descriptorType; } + VkDescriptorType getDescriptorType() { return _info.descriptorType; } /** Returns whether this binding uses immutable samplers. */ bool usesImmutableSamplers() { return !_immutableSamplers.empty(); } @@ -169,6 +172,7 @@ class MVKDescriptorSetLayoutBinding : public MVKBaseDeviceObject { protected: friend class MVKDescriptorSetLayout; + friend class MVKDescriptorSet; friend class MVKInlineUniformBlockDescriptor; void initMetalResourceIndexOffsets(const VkDescriptorSetLayoutBinding* pBinding, uint32_t stage); @@ -182,6 +186,8 @@ class MVKDescriptorSetLayoutBinding : public MVKBaseDeviceObject { MVKShaderResourceBinding& dslMTLRezIdxOffsets, uint32_t dslIndex); bool validate(MVKSampler* mvkSampler); + void encodeImmutableSamplersToMetalArgumentBuffer(MVKDescriptorSet* mvkDescSet); + uint32_t getResourceCount(); MVKDescriptorSetLayout* _layout; VkDescriptorSetLayoutBinding _info; @@ -196,23 +202,6 @@ class MVKDescriptorSetLayoutBinding : public MVKBaseDeviceObject { #pragma mark - #pragma mark MVKDescriptor -/** - * Helper object to handle the placement of resources into a Metal Argument Buffer - * in a consistent manner, whether or not a MTLArgumentEncoder is required. - */ -typedef struct MVKArgumentBufferEncoder { - void setArgumentBuffer(id mtlArgBuff, NSUInteger mtlArgBuffOfst); - void setBuffer(id mtlBuff, NSUInteger offset, uint32_t index); - void setTexture(id mtlTex, uint32_t index); - void setSamplerState(id mtlSamp, uint32_t index); - MVKArgumentBufferEncoder(MVKMTLArgumentEncoder& mvkMTLArgEnc); -protected: - void* getArgumentPointer(uint32_t index) const; - id _mtlArgumentEncoder = nil; - id _mtlArgumentBuffer = nil; - NSUInteger _mtlArgumentBufferOffset = 0; -} MVKArgumentBufferEncoder; - /** Represents a Vulkan descriptor. */ class MVKDescriptor : public MVKBaseObject { @@ -236,16 +225,6 @@ class MVKDescriptor : public MVKBaseObject { MVKArrayRef dynamicOffsets, uint32_t& dynamicOffsetIndex) = 0; - /** Encodes this descriptor to the Metal argument buffer. */ - virtual void encodeToMetalArgumentBuffer(MVKResourcesCommandEncoderState* rezEncState, - MVKArgumentBufferEncoder& mvkArgBuffEnc, - uint32_t descSetIndex, - MVKDescriptorSetLayoutBinding* mvkDSLBind, - uint32_t elementIndex, - MVKShaderStage stage, - bool encodeToArgBuffer, - bool encodeUsage) = 0; - /** * Updates the internal binding from the specified content. The format of the content depends * on the descriptor type, and is extracted from pData at the location given by index * stride. @@ -253,8 +232,9 @@ class MVKDescriptor : public MVKBaseObject { */ virtual void write(MVKDescriptorSetLayoutBinding* mvkDSLBind, MVKDescriptorSet* mvkDescSet, - uint32_t index, - size_t stride, + uint32_t dstIdx, + uint32_t srcIdx, + size_t srcStride, const void* pData) = 0; /** @@ -276,6 +256,11 @@ class MVKDescriptor : public MVKBaseObject { VkBufferView* pTexelBufferView, VkWriteDescriptorSetInlineUniformBlockEXT* inlineUniformBlock) = 0; + /** Encodes the usage of this resource to the Metal command encoder. */ + virtual void encodeResourceUsage(MVKResourcesCommandEncoderState* rezEncState, + MVKDescriptorSetLayoutBinding* mvkDSLBind, + MVKShaderStage stage) = 0; + /** Resets any internal content. */ virtual void reset() {} @@ -303,19 +288,11 @@ class MVKBufferDescriptor : public MVKDescriptor { MVKArrayRef dynamicOffsets, uint32_t& dynamicOffsetIndex) override; - void encodeToMetalArgumentBuffer(MVKResourcesCommandEncoderState* rezEncState, - MVKArgumentBufferEncoder& mvkArgBuffEnc, - uint32_t descSetIndex, - MVKDescriptorSetLayoutBinding* mvkDSLBind, - uint32_t elementIndex, - MVKShaderStage stage, - bool encodeToArgBuffer, - bool encodeUsage) override; - void write(MVKDescriptorSetLayoutBinding* mvkDSLBind, MVKDescriptorSet* mvkDescSet, - uint32_t srcIndex, - size_t stride, + uint32_t dstIdx, + uint32_t srcIdx, + size_t srcStride, const void* pData) override; void read(MVKDescriptorSetLayoutBinding* mvkDSLBind, @@ -326,6 +303,10 @@ class MVKBufferDescriptor : public MVKDescriptor { VkBufferView* pTexelBufferView, VkWriteDescriptorSetInlineUniformBlockEXT* inlineUniformBlock) override; + void encodeResourceUsage(MVKResourcesCommandEncoderState* rezEncState, + MVKDescriptorSetLayoutBinding* mvkDSLBind, + MVKShaderStage stage) override; + void reset() override; ~MVKBufferDescriptor() { reset(); } @@ -393,19 +374,12 @@ class MVKInlineUniformBlockDescriptor : public MVKDescriptor { MVKArrayRef dynamicOffsets, uint32_t& dynamicOffsetIndex) override; - void encodeToMetalArgumentBuffer(MVKResourcesCommandEncoderState* rezEncState, - MVKArgumentBufferEncoder& mvkArgBuffEnc, - uint32_t descSetIndex, - MVKDescriptorSetLayoutBinding* mvkDSLBind, - uint32_t elementIndex, - MVKShaderStage stage, - bool encodeToArgBuffer, - bool encodeUsage) override; - void write(MVKDescriptorSetLayoutBinding* mvkDSLBind, MVKDescriptorSet* mvkDescSet, - uint32_t dstOffset, // For inline buffers we are using this parameter as dst offset not as src descIdx - size_t stride, +// uint32_t dstOffset, // For inline buffers we are using this parameter as dst offset not as src descIdx + uint32_t dstIdx, + uint32_t srcIdx, + size_t srcStride, const void* pData) override; void read(MVKDescriptorSetLayoutBinding* mvkDSLBind, @@ -415,7 +389,11 @@ class MVKInlineUniformBlockDescriptor : public MVKDescriptor { VkDescriptorBufferInfo* pBufferInfo, VkBufferView* pTexelBufferView, VkWriteDescriptorSetInlineUniformBlockEXT* inlineUniformBlock) override; - + + void encodeResourceUsage(MVKResourcesCommandEncoderState* rezEncState, + MVKDescriptorSetLayoutBinding* mvkDSLBind, + MVKShaderStage stage) override; + void reset() override; ~MVKInlineUniformBlockDescriptor() { reset(); } @@ -443,19 +421,11 @@ class MVKImageDescriptor : public MVKDescriptor { MVKArrayRef dynamicOffsets, uint32_t& dynamicOffsetIndex) override; - void encodeToMetalArgumentBuffer(MVKResourcesCommandEncoderState* rezEncState, - MVKArgumentBufferEncoder& mvkArgBuffEnc, - uint32_t descSetIndex, - MVKDescriptorSetLayoutBinding* mvkDSLBind, - uint32_t elementIndex, - MVKShaderStage stage, - bool encodeToArgBuffer, - bool encodeUsage) override; - void write(MVKDescriptorSetLayoutBinding* mvkDSLBind, MVKDescriptorSet* mvkDescSet, - uint32_t srcIndex, - size_t stride, + uint32_t dstIdx, + uint32_t srcIdx, + size_t srcStride, const void* pData) override; void read(MVKDescriptorSetLayoutBinding* mvkDSLBind, @@ -466,6 +436,10 @@ class MVKImageDescriptor : public MVKDescriptor { VkBufferView* pTexelBufferView, VkWriteDescriptorSetInlineUniformBlockEXT* inlineUniformBlock) override; + void encodeResourceUsage(MVKResourcesCommandEncoderState* rezEncState, + MVKDescriptorSetLayoutBinding* mvkDSLBind, + MVKShaderStage stage) override; + void reset() override; ~MVKImageDescriptor() { reset(); } @@ -524,18 +498,11 @@ class MVKSamplerDescriptorMixin { MVKArrayRef dynamicOffsets, uint32_t& dynamicOffsetIndex); - void encodeToMetalArgumentBuffer(MVKResourcesCommandEncoderState* rezEncState, - MVKArgumentBufferEncoder& mvkArgBuffEnc, - uint32_t descSetIndex, - MVKDescriptorSetLayoutBinding* mvkDSLBind, - uint32_t elementIndex, - MVKShaderStage stage, - bool encodeToArgBuffer); - void write(MVKDescriptorSetLayoutBinding* mvkDSLBind, MVKDescriptorSet* mvkDescSet, - uint32_t srcIndex, - size_t stride, + uint32_t dstIdx, + uint32_t srcIdx, + size_t srcStride, const void* pData); void read(MVKDescriptorSetLayoutBinding* mvkDSLBind, @@ -546,6 +513,10 @@ class MVKSamplerDescriptorMixin { VkBufferView* pTexelBufferView, VkWriteDescriptorSetInlineUniformBlockEXT* inlineUniformBlock); + void encodeResourceUsage(MVKResourcesCommandEncoderState* rezEncState, + MVKDescriptorSetLayoutBinding* mvkDSLBind, + MVKShaderStage stage) {} + void reset(); ~MVKSamplerDescriptorMixin() { reset(); } @@ -572,19 +543,11 @@ class MVKSamplerDescriptor : public MVKDescriptor, public MVKSamplerDescriptorMi MVKArrayRef dynamicOffsets, uint32_t& dynamicOffsetIndex) override; - void encodeToMetalArgumentBuffer(MVKResourcesCommandEncoderState* rezEncState, - MVKArgumentBufferEncoder& mvkArgBuffEnc, - uint32_t descSetIndex, - MVKDescriptorSetLayoutBinding* mvkDSLBind, - uint32_t elementIndex, - MVKShaderStage stage, - bool encodeToArgBuffer, - bool encodeUsage) override; - void write(MVKDescriptorSetLayoutBinding* mvkDSLBind, MVKDescriptorSet* mvkDescSet, - uint32_t srcIndex, - size_t stride, + uint32_t dstIdx, + uint32_t srcIdx, + size_t srcStride, const void* pData) override; void read(MVKDescriptorSetLayoutBinding* mvkDSLBind, @@ -595,6 +558,10 @@ class MVKSamplerDescriptor : public MVKDescriptor, public MVKSamplerDescriptorMi VkBufferView* pTexelBufferView, VkWriteDescriptorSetInlineUniformBlockEXT* inlineUniformBlock) override; + void encodeResourceUsage(MVKResourcesCommandEncoderState* rezEncState, + MVKDescriptorSetLayoutBinding* mvkDSLBind, + MVKShaderStage stage) override {} + void reset() override; ~MVKSamplerDescriptor() { reset(); } @@ -620,19 +587,11 @@ class MVKCombinedImageSamplerDescriptor : public MVKImageDescriptor, public MVKS MVKArrayRef dynamicOffsets, uint32_t& dynamicOffsetIndex) override; - void encodeToMetalArgumentBuffer(MVKResourcesCommandEncoderState* rezEncState, - MVKArgumentBufferEncoder& mvkArgBuffEnc, - uint32_t descSetIndex, - MVKDescriptorSetLayoutBinding* mvkDSLBind, - uint32_t elementIndex, - MVKShaderStage stage, - bool encodeToArgBuffer, - bool encodeUsage) override; - void write(MVKDescriptorSetLayoutBinding* mvkDSLBind, MVKDescriptorSet* mvkDescSet, - uint32_t srcIndex, - size_t stride, + uint32_t dstIdx, + uint32_t srcIdx, + size_t srcStride, const void* pData) override; void read(MVKDescriptorSetLayoutBinding* mvkDSLBind, @@ -643,6 +602,10 @@ class MVKCombinedImageSamplerDescriptor : public MVKImageDescriptor, public MVKS VkBufferView* pTexelBufferView, VkWriteDescriptorSetInlineUniformBlockEXT* inlineUniformBlock) override; + void encodeResourceUsage(MVKResourcesCommandEncoderState* rezEncState, + MVKDescriptorSetLayoutBinding* mvkDSLBind, + MVKShaderStage stage) override; + void reset() override; ~MVKCombinedImageSamplerDescriptor() { reset(); } @@ -666,19 +629,11 @@ class MVKTexelBufferDescriptor : public MVKDescriptor { MVKArrayRef dynamicOffsets, uint32_t& dynamicOffsetIndex) override; - void encodeToMetalArgumentBuffer(MVKResourcesCommandEncoderState* rezEncState, - MVKArgumentBufferEncoder& mvkArgBuffEnc, - uint32_t descSetIndex, - MVKDescriptorSetLayoutBinding* mvkDSLBind, - uint32_t elementIndex, - MVKShaderStage stage, - bool encodeToArgBuffer, - bool encodeUsage) override; - void write(MVKDescriptorSetLayoutBinding* mvkDSLBind, MVKDescriptorSet* mvkDescSet, - uint32_t srcIndex, - size_t stride, + uint32_t dstIdx, + uint32_t srcIdx, + size_t srcStride, const void* pData) override; void read(MVKDescriptorSetLayoutBinding* mvkDSLBind, @@ -689,6 +644,10 @@ class MVKTexelBufferDescriptor : public MVKDescriptor { VkBufferView* pTexelBufferView, VkWriteDescriptorSetInlineUniformBlockEXT* inlineUniformBlock) override; + void encodeResourceUsage(MVKResourcesCommandEncoderState* rezEncState, + MVKDescriptorSetLayoutBinding* mvkDSLBind, + MVKShaderStage stage) override; + void reset() override; ~MVKTexelBufferDescriptor() { reset(); } diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.mm index 6751c3277..caa75684e 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.mm @@ -114,6 +114,7 @@ void mvkPopulateShaderConversionConfig(mvk::SPIRVToMSLConversionConfiguration& s VkDescriptorType descType, MVKSampler* immutableSampler, bool usingNativeTextureAtomics) { + if (count == 0) { return; } #define addResourceBinding(spvRezType) \ do { \ @@ -490,19 +491,46 @@ void mvkPopulateShaderConversionConfig(mvk::SPIRVToMSLConversionConfiguration& s [args addObject: argDesc]; } +uint32_t MVKDescriptorSetLayoutBinding::getResourceCount() { + switch (getDescriptorType()) { + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: + return 2; + + default: + return 1; + } +} + +// Encodes an immutable sampler to the Metal argument buffer. +void MVKDescriptorSetLayoutBinding::encodeImmutableSamplersToMetalArgumentBuffer(MVKDescriptorSet* mvkDescSet) { + if ( !mvkDescSet->isUsingMetalArgumentBuffer() ) { return; } + + auto& mvkArgBuff = mvkDescSet->getMetalArgumentBuffer(); + size_t sCnt = _immutableSamplers.size(); + for (uint32_t sIdx = 0; sIdx < sCnt; sIdx++) { + MVKSampler* mvkSamp = _immutableSamplers[sIdx]; + id mtlSamp = (mvkSamp + ? mvkSamp->getMTLSamplerState() + : getDevice()->getDefaultMTLSamplerState()); + uint32_t argIdx = getMetalResourceIndexOffsets().samplerIndex + sIdx; + mvkArgBuff.setSamplerState(mtlSamp, argIdx); + } +} + void MVKDescriptorSetLayoutBinding::populateShaderConversionConfig(mvk::SPIRVToMSLConversionConfiguration& shaderConfig, MVKShaderResourceBinding& dslMTLRezIdxOffsets, uint32_t dslIndex) { - + uint32_t descCnt = getDescriptorCount(); + bool isUsingMtlArgBuff = isUsingMetalArgumentBuffer(); MVKSampler* mvkSamp = !_immutableSamplers.empty() ? _immutableSamplers.front() : nullptr; - // Establish the resource indices to use, by combining the offsets of the DSL and this DSL binding. + // Establish the resource indices to use, by combining the offsets of the DSL and this DSL binding. MVKShaderResourceBinding mtlIdxs = _mtlResourceIndexOffsets + dslMTLRezIdxOffsets; - uint32_t descCnt = getDescriptorCount(); - bool isUsingMtlArgBuff = isUsingMetalArgumentBuffer(); for (uint32_t stage = kMVKShaderStageVertex; stage < kMVKShaderStageCount; stage++) { - if ((_applyToStage[stage] || isUsingMtlArgBuff) && descCnt > 0) { + if (_applyToStage[stage] || isUsingMtlArgBuff) { mvkPopulateShaderConversionConfig(shaderConfig, mtlIdxs.stages[stage], MVKShaderStage(stage), @@ -703,46 +731,6 @@ void mvkPopulateShaderConversionConfig(mvk::SPIRVToMSLConversionConfiguration& s #pragma mark - #pragma mark MVKDescriptor -void MVKArgumentBufferEncoder::setArgumentBuffer(id mtlArgBuff, NSUInteger mtlArgBuffOfst) { - if (_mtlArgumentEncoder && mtlArgBuff != _mtlArgumentBuffer) { - [_mtlArgumentEncoder setArgumentBuffer: mtlArgBuff offset: mtlArgBuffOfst]; - } - _mtlArgumentBuffer = mtlArgBuff; - _mtlArgumentBufferOffset = mtlArgBuffOfst; -} - -void MVKArgumentBufferEncoder::setBuffer(id mtlBuff, NSUInteger offset, uint32_t index) { - if (_mtlArgumentEncoder) { - [_mtlArgumentEncoder setBuffer: mtlBuff offset: offset atIndex: index]; - } else { - *(uint64_t*)getArgumentPointer(index) = mtlBuff.gpuAddress + offset; - } -} - -void MVKArgumentBufferEncoder::setTexture(id mtlTex, uint32_t index) { - if (_mtlArgumentEncoder) { - [_mtlArgumentEncoder setTexture: mtlTex atIndex: index]; - } else { - *(MTLResourceID*)getArgumentPointer(index) = mtlTex.gpuResourceID; - } -} - -void MVKArgumentBufferEncoder::setSamplerState(id mtlSamp, uint32_t index) { - if (_mtlArgumentEncoder) { - [_mtlArgumentEncoder setSamplerState: mtlSamp atIndex: index]; - } else { - *(MTLResourceID*)getArgumentPointer(index) = mtlSamp.gpuResourceID; - } -} - -// Returns the address of the slot at the index within the Metal argument buffer. -// This is based on the Metal 3 design that all arg buffer slots are 64 bits. -void* MVKArgumentBufferEncoder::getArgumentPointer(uint32_t index) const { - return (void*)((uintptr_t)_mtlArgumentBuffer.contents + _mtlArgumentBufferOffset + (index * sizeof(uint64_t))); -} - -MVKArgumentBufferEncoder::MVKArgumentBufferEncoder(MVKMTLArgumentEncoder& mvkMTLArgEnc) : _mtlArgumentEncoder(mvkMTLArgEnc._mtlArgumentEncoder) {} - MTLResourceUsage MVKDescriptor::getMTLResourceUsage() { MTLResourceUsage mtlUsage = MTLResourceUsageRead; switch (getDescriptorType()) { @@ -796,40 +784,30 @@ void mvkPopulateShaderConversionConfig(mvk::SPIRVToMSLConversionConfiguration& s } } -void MVKBufferDescriptor::encodeToMetalArgumentBuffer(MVKResourcesCommandEncoderState* rezEncState, - MVKArgumentBufferEncoder& mvkArgBuffEnc, - uint32_t descSetIndex, - MVKDescriptorSetLayoutBinding* mvkDSLBind, - uint32_t elementIndex, - MVKShaderStage stage, - bool encodeToArgBuffer, - bool encodeUsage) { - if (encodeToArgBuffer) { - uint32_t argIdx = mvkDSLBind->getMetalResourceIndexOffsets().bufferIndex + elementIndex; - mvkArgBuffEnc.setBuffer(_mvkBuffer ? _mvkBuffer->getMTLBuffer() : nil, - _mvkBuffer ? _mvkBuffer->getMTLBufferOffset() + _buffOffset : 0, - argIdx); - } - if (encodeUsage) { - id mtlBuffer = _mvkBuffer ? _mvkBuffer->getMTLBuffer() : nil; - rezEncState->encodeResourceUsage(stage, mtlBuffer, getMTLResourceUsage(), mvkDSLBind->getMTLRenderStages()); - } -} - void MVKBufferDescriptor::write(MVKDescriptorSetLayoutBinding* mvkDSLBind, MVKDescriptorSet* mvkDescSet, - uint32_t srcIndex, - size_t stride, + uint32_t dstIdx, + uint32_t srcIdx, + size_t srcStride, const void* pData) { auto* oldBuff = _mvkBuffer; - const auto* pBuffInfo = &get(pData, stride, srcIndex); + const auto* pBuffInfo = &get(pData, srcStride, srcIdx); _mvkBuffer = (MVKBuffer*)pBuffInfo->buffer; _buffOffset = pBuffInfo->offset; _buffRange = pBuffInfo->range; if (_mvkBuffer) { _mvkBuffer->retain(); } if (oldBuff) { oldBuff->release(); } + + // Write resource to Metal argument buffer + if (mvkDescSet->isUsingMetalArgumentBuffer()) { + auto& mvkArgBuff = mvkDescSet->getMetalArgumentBuffer(); + uint32_t argIdx = mvkDSLBind->getMetalResourceIndexOffsets().bufferIndex + dstIdx; + mvkArgBuff.setBuffer(_mvkBuffer ? _mvkBuffer->getMTLBuffer() : nil, + _mvkBuffer ? _mvkBuffer->getMTLBufferOffset() + _buffOffset : 0, + argIdx); + } } void MVKBufferDescriptor::read(MVKDescriptorSetLayoutBinding* mvkDSLBind, @@ -853,6 +831,13 @@ void mvkPopulateShaderConversionConfig(mvk::SPIRVToMSLConversionConfiguration& s MVKDescriptor::reset(); } +void MVKBufferDescriptor::encodeResourceUsage(MVKResourcesCommandEncoderState* rezEncState, + MVKDescriptorSetLayoutBinding* mvkDSLBind, + MVKShaderStage stage) { + id mtlBuffer = _mvkBuffer ? _mvkBuffer->getMTLBuffer() : nil; + rezEncState->encodeResourceUsage(stage, mtlBuffer, getMTLResourceUsage(), mvkDSLBind->getMTLRenderStages()); +} + #pragma mark - #pragma mark MVKInlineUniformBlockDescriptor @@ -881,31 +866,16 @@ void mvkPopulateShaderConversionConfig(mvk::SPIRVToMSLConversionConfiguration& s } } -void MVKInlineUniformBlockDescriptor::encodeToMetalArgumentBuffer(MVKResourcesCommandEncoderState* rezEncState, - MVKArgumentBufferEncoder& mvkArgBuffEnc, - uint32_t descSetIndex, - MVKDescriptorSetLayoutBinding* mvkDSLBind, - uint32_t elementIndex, - MVKShaderStage stage, - bool encodeToArgBuffer, - bool encodeUsage) { - if (encodeToArgBuffer) { - uint32_t argIdx = mvkDSLBind->getMetalResourceIndexOffsets().bufferIndex; - mvkArgBuffEnc.setBuffer(_mvkMTLBufferAllocation ? _mvkMTLBufferAllocation->_mtlBuffer : nil, - _mvkMTLBufferAllocation ? _mvkMTLBufferAllocation->_offset : 0, - argIdx); - } - if (encodeUsage) { - id mtlBuffer = _mvkMTLBufferAllocation ? _mvkMTLBufferAllocation->_mtlBuffer : nil; - rezEncState->encodeResourceUsage(stage, mtlBuffer, getMTLResourceUsage(), mvkDSLBind->getMTLRenderStages()); - } -} - void MVKInlineUniformBlockDescriptor::write(MVKDescriptorSetLayoutBinding* mvkDSLBind, MVKDescriptorSet* mvkDescSet, - uint32_t dstOffset, - size_t stride, + uint32_t dstIdx, + uint32_t srcIdx, + size_t srcStride, const void* pData) { + + // For inline buffers the dstIdx is actually a dstOffset + uint32_t dstOffset = dstIdx; + // Ensure there is a destination to write to uint32_t buffSize = mvkDSLBind->_info.descriptorCount; if ( !_mvkMTLBufferAllocation ) { _mvkMTLBufferAllocation = mvkDescSet->acquireMTLBufferRegion(buffSize); } @@ -916,6 +886,15 @@ void mvkPopulateShaderConversionConfig(mvk::SPIRVToMSLConversionConfiguration& s uint32_t dataLen = std::min(pInlineUniformBlock.dataSize, buffSize - dstOffset); memcpy(data + dstOffset, pInlineUniformBlock.pData, dataLen); } + + // Write resource to Metal argument buffer + if (mvkDescSet->isUsingMetalArgumentBuffer()) { + auto& mvkArgBuff = mvkDescSet->getMetalArgumentBuffer(); + uint32_t argIdx = mvkDSLBind->getMetalResourceIndexOffsets().bufferIndex; + mvkArgBuff.setBuffer(_mvkMTLBufferAllocation ? _mvkMTLBufferAllocation->_mtlBuffer : nil, + _mvkMTLBufferAllocation ? _mvkMTLBufferAllocation->_offset : 0, + argIdx); + } } void MVKInlineUniformBlockDescriptor::read(MVKDescriptorSetLayoutBinding* mvkDSLBind, @@ -933,6 +912,13 @@ void mvkPopulateShaderConversionConfig(mvk::SPIRVToMSLConversionConfiguration& s } } +void MVKInlineUniformBlockDescriptor::encodeResourceUsage(MVKResourcesCommandEncoderState* rezEncState, + MVKDescriptorSetLayoutBinding* mvkDSLBind, + MVKShaderStage stage) { + id mtlBuffer = _mvkMTLBufferAllocation ? _mvkMTLBufferAllocation->_mtlBuffer : nil; + rezEncState->encodeResourceUsage(stage, mtlBuffer, getMTLResourceUsage(), mvkDSLBind->getMTLRenderStages()); +} + void MVKInlineUniformBlockDescriptor::reset() { if (_mvkMTLBufferAllocation) { _mvkMTLBufferAllocation->returnToPool(); } _mvkMTLBufferAllocation = nullptr; @@ -985,56 +971,43 @@ void mvkPopulateShaderConversionConfig(mvk::SPIRVToMSLConversionConfiguration& s } } -void MVKImageDescriptor::encodeToMetalArgumentBuffer(MVKResourcesCommandEncoderState* rezEncState, - MVKArgumentBufferEncoder& mvkArgBuffEnc, - uint32_t descSetIndex, - MVKDescriptorSetLayoutBinding* mvkDSLBind, - uint32_t elementIndex, - MVKShaderStage stage, - bool encodeToArgBuffer, - bool encodeUsage) { - VkDescriptorType descType = getDescriptorType(); - uint8_t planeCount = (_mvkImageView) ? _mvkImageView->getPlaneCount() : 1; - - for (uint8_t planeIndex = 0; planeIndex < planeCount; planeIndex++) { - uint32_t planeDescIdx = (elementIndex * planeCount) + planeIndex; - - id mtlTexture = _mvkImageView ? _mvkImageView->getMTLTexture(planeIndex) : nil; - if (encodeToArgBuffer) { - uint32_t argIdx = mvkDSLBind->getMetalResourceIndexOffsets().textureIndex + planeDescIdx; - mvkArgBuffEnc.setTexture(mtlTexture, argIdx); - } - if (encodeUsage) { - rezEncState->encodeResourceUsage(stage, mtlTexture, getMTLResourceUsage(), mvkDSLBind->getMTLRenderStages()); - } - if (descType == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE && !mvkDSLBind->getMetalFeatures().nativeTextureAtomics) { - id mtlTex = mtlTexture.parentTexture ? mtlTexture.parentTexture : mtlTexture; - id mtlBuff = mtlTex.buffer; - if (mtlBuff) { - if (encodeToArgBuffer) { - uint32_t argIdx = mvkDSLBind->getMetalResourceIndexOffsets().bufferIndex + planeDescIdx; - mvkArgBuffEnc.setBuffer(mtlBuff, mtlTex.bufferOffset, argIdx); - } - if (encodeUsage) { - rezEncState->encodeResourceUsage(stage, mtlBuff, getMTLResourceUsage(), mvkDSLBind->getMTLRenderStages()); - } - } - } - } -} - void MVKImageDescriptor::write(MVKDescriptorSetLayoutBinding* mvkDSLBind, MVKDescriptorSet* mvkDescSet, - uint32_t srcIndex, - size_t stride, + uint32_t dstIdx, + uint32_t srcIdx, + size_t srcStride, const void* pData) { auto* oldImgView = _mvkImageView; - const auto* pImgInfo = &get(pData, stride, srcIndex); + const auto* pImgInfo = &get(pData, srcStride, srcIdx); _mvkImageView = (MVKImageView*)pImgInfo->imageView; if (_mvkImageView) { _mvkImageView->retain(); } if (oldImgView) { oldImgView->release(); } + + // Write resource to Metal argument buffer + if (mvkDescSet->isUsingMetalArgumentBuffer()) { + auto& mvkArgBuff = mvkDescSet->getMetalArgumentBuffer(); + VkDescriptorType descType = getDescriptorType(); + + uint8_t planeCount = (_mvkImageView) ? _mvkImageView->getPlaneCount() : 1; + for (uint8_t planeIndex = 0; planeIndex < planeCount; planeIndex++) { + uint32_t planeDescIdx = (dstIdx * planeCount) + planeIndex; + + id mtlTexture = _mvkImageView ? _mvkImageView->getMTLTexture(planeIndex) : nil; + uint32_t texArgIdx = mvkDSLBind->getMetalResourceIndexOffsets().textureIndex + planeDescIdx; + mvkArgBuff.setTexture(mtlTexture, texArgIdx); + + if (descType == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE) { + id mtlTex = mtlTexture.parentTexture ? mtlTexture.parentTexture : mtlTexture; + id mtlBuff = mtlTex.buffer; + if (mtlBuff) { + uint32_t buffArgIdx = mvkDSLBind->getMetalResourceIndexOffsets().bufferIndex + planeDescIdx; + mvkArgBuff.setBuffer(mtlBuff, mtlTex.bufferOffset, buffArgIdx); + } + } + } + } } void MVKImageDescriptor::read(MVKDescriptorSetLayoutBinding* mvkDSLBind, @@ -1049,6 +1022,25 @@ void mvkPopulateShaderConversionConfig(mvk::SPIRVToMSLConversionConfiguration& s imgInfo.imageLayout = VK_IMAGE_LAYOUT_UNDEFINED; } +void MVKImageDescriptor::encodeResourceUsage(MVKResourcesCommandEncoderState* rezEncState, + MVKDescriptorSetLayoutBinding* mvkDSLBind, + MVKShaderStage stage) { + VkDescriptorType descType = getDescriptorType(); + uint8_t planeCount = (_mvkImageView) ? _mvkImageView->getPlaneCount() : 1; + for (uint8_t planeIndex = 0; planeIndex < planeCount; planeIndex++) { + id mtlTexture = _mvkImageView ? _mvkImageView->getMTLTexture(planeIndex) : nil; + rezEncState->encodeResourceUsage(stage, mtlTexture, getMTLResourceUsage(), mvkDSLBind->getMTLRenderStages()); + + if (descType == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE) { + id mtlTex = mtlTexture.parentTexture ? mtlTexture.parentTexture : mtlTexture; + id mtlBuff = mtlTex.buffer; + if (mtlBuff) { + rezEncState->encodeResourceUsage(stage, mtlBuff, getMTLResourceUsage(), mvkDSLBind->getMTLRenderStages()); + } + } + } +} + void MVKImageDescriptor::reset() { if (_mvkImageView) { _mvkImageView->release(); } _mvkImageView = nullptr; @@ -1086,37 +1078,18 @@ void mvkPopulateShaderConversionConfig(mvk::SPIRVToMSLConversionConfiguration& s } } -// Metal validation requires each sampler in an array of samplers to be populated, -// even if not used, so populate a default if one hasn't been set. -void MVKSamplerDescriptorMixin::encodeToMetalArgumentBuffer(MVKResourcesCommandEncoderState* rezEncState, - MVKArgumentBufferEncoder& mvkArgBuffEnc, - uint32_t descSetIndex, - MVKDescriptorSetLayoutBinding* mvkDSLBind, - uint32_t elementIndex, - MVKShaderStage stage, - bool encodeToArgBuffer) { - if (encodeToArgBuffer) { - MVKSampler* imutSamp = mvkDSLBind->getImmutableSampler(elementIndex); - MVKSampler* mvkSamp = imutSamp ? imutSamp : _mvkSampler; - id mtlSamp = (mvkSamp - ? mvkSamp->getMTLSamplerState() - : mvkDSLBind->getDevice()->getDefaultMTLSamplerState()); - uint32_t argIdx = mvkDSLBind->getMetalResourceIndexOffsets().samplerIndex + elementIndex; - mvkArgBuffEnc.setSamplerState(mtlSamp, argIdx); - } -} - void MVKSamplerDescriptorMixin::write(MVKDescriptorSetLayoutBinding* mvkDSLBind, MVKDescriptorSet* mvkDescSet, - uint32_t srcIndex, - size_t stride, + uint32_t dstIdx, + uint32_t srcIdx, + size_t srcStride, const void* pData) { if (mvkDSLBind->usesImmutableSamplers()) { return; } auto* oldSamp = _mvkSampler; - const auto* pImgInfo = &get(pData, stride, srcIndex); + const auto* pImgInfo = &get(pData, srcStride, srcIdx); _mvkSampler = (MVKSampler*)pImgInfo->sampler; if (_mvkSampler && _mvkSampler->getRequiresConstExprSampler()) { _mvkSampler->reportError(VK_ERROR_FEATURE_NOT_PRESENT, "vkUpdateDescriptorSets(): Tried to push an immutable sampler."); @@ -1124,6 +1097,18 @@ void mvkPopulateShaderConversionConfig(mvk::SPIRVToMSLConversionConfiguration& s if (_mvkSampler) { _mvkSampler->retain(); } if (oldSamp) { oldSamp->release(); } + + // Write resource to Metal argument buffer + if (mvkDescSet->isUsingMetalArgumentBuffer()) { + auto& mvkArgBuff = mvkDescSet->getMetalArgumentBuffer(); + MVKSampler* imutSamp = mvkDSLBind->getImmutableSampler(dstIdx); + MVKSampler* mvkSamp = imutSamp ? imutSamp : _mvkSampler; + id mtlSamp = (mvkSamp + ? mvkSamp->getMTLSamplerState() + : mvkDSLBind->getDevice()->getDefaultMTLSamplerState()); + uint32_t argIdx = mvkDSLBind->getMetalResourceIndexOffsets().samplerIndex + dstIdx; + mvkArgBuff.setSamplerState(mtlSamp, argIdx); + } } void MVKSamplerDescriptorMixin::read(MVKDescriptorSetLayoutBinding* mvkDSLBind, @@ -1158,23 +1143,13 @@ void mvkPopulateShaderConversionConfig(mvk::SPIRVToMSLConversionConfiguration& s MVKSamplerDescriptorMixin::bind(cmdEncoder, pipelineBindPoint, mvkDSLBind, elementIndex, stages, mtlIndexes, dynamicOffsets, dynamicOffsetIndex); } -void MVKSamplerDescriptor::encodeToMetalArgumentBuffer(MVKResourcesCommandEncoderState* rezEncState, - MVKArgumentBufferEncoder& mvkArgBuffEnc, - uint32_t descSetIndex, - MVKDescriptorSetLayoutBinding* mvkDSLBind, - uint32_t elementIndex, - MVKShaderStage stage, - bool encodeToArgBuffer, - bool encodeUsage) { - MVKSamplerDescriptorMixin::encodeToMetalArgumentBuffer(rezEncState, mvkArgBuffEnc, descSetIndex, mvkDSLBind, elementIndex, stage, encodeToArgBuffer); -} - void MVKSamplerDescriptor::write(MVKDescriptorSetLayoutBinding* mvkDSLBind, MVKDescriptorSet* mvkDescSet, - uint32_t srcIndex, - size_t stride, + uint32_t dstIdx, + uint32_t srcIdx, + size_t srcStride, const void* pData) { - MVKSamplerDescriptorMixin::write(mvkDSLBind, mvkDescSet, srcIndex, stride, pData); + MVKSamplerDescriptorMixin::write(mvkDSLBind, mvkDescSet, dstIdx, srcIdx, srcStride, pData); } void MVKSamplerDescriptor::read(MVKDescriptorSetLayoutBinding* mvkDSLBind, @@ -1209,25 +1184,14 @@ void mvkPopulateShaderConversionConfig(mvk::SPIRVToMSLConversionConfiguration& s MVKSamplerDescriptorMixin::bind(cmdEncoder, pipelineBindPoint, mvkDSLBind, elementIndex, stages, mtlIndexes, dynamicOffsets, dynamicOffsetIndex); } -void MVKCombinedImageSamplerDescriptor::encodeToMetalArgumentBuffer(MVKResourcesCommandEncoderState* rezEncState, - MVKArgumentBufferEncoder& mvkArgBuffEnc, - uint32_t descSetIndex, - MVKDescriptorSetLayoutBinding* mvkDSLBind, - uint32_t elementIndex, - MVKShaderStage stage, - bool encodeToArgBuffer, - bool encodeUsage) { - MVKImageDescriptor::encodeToMetalArgumentBuffer(rezEncState, mvkArgBuffEnc, descSetIndex, mvkDSLBind, elementIndex, stage, encodeToArgBuffer, encodeUsage); - MVKSamplerDescriptorMixin::encodeToMetalArgumentBuffer(rezEncState, mvkArgBuffEnc, descSetIndex, mvkDSLBind, elementIndex, stage, encodeToArgBuffer); -} - void MVKCombinedImageSamplerDescriptor::write(MVKDescriptorSetLayoutBinding* mvkDSLBind, MVKDescriptorSet* mvkDescSet, - uint32_t srcIndex, - size_t stride, + uint32_t dstIdx, + uint32_t srcIdx, + size_t srcStride, const void* pData) { - MVKImageDescriptor::write(mvkDSLBind, mvkDescSet, srcIndex, stride, pData); - MVKSamplerDescriptorMixin::write(mvkDSLBind, mvkDescSet, srcIndex, stride, pData); + MVKImageDescriptor::write(mvkDSLBind, mvkDescSet, dstIdx, srcIdx, srcStride, pData); + MVKSamplerDescriptorMixin::write(mvkDSLBind, mvkDescSet, dstIdx, srcIdx, srcStride, pData); } void MVKCombinedImageSamplerDescriptor::read(MVKDescriptorSetLayoutBinding* mvkDSLBind, @@ -1241,6 +1205,12 @@ void mvkPopulateShaderConversionConfig(mvk::SPIRVToMSLConversionConfiguration& s MVKSamplerDescriptorMixin::read(mvkDSLBind, mvkDescSet, dstIndex, pImageInfo, pBufferInfo, pTexelBufferView, pInlineUniformBlock); } +void MVKCombinedImageSamplerDescriptor::encodeResourceUsage(MVKResourcesCommandEncoderState* rezEncState, + MVKDescriptorSetLayoutBinding* mvkDSLBind, + MVKShaderStage stage) { + MVKImageDescriptor::encodeResourceUsage(rezEncState, mvkDSLBind, stage); +} + void MVKCombinedImageSamplerDescriptor::reset() { MVKSamplerDescriptorMixin::reset(); MVKImageDescriptor::reset(); @@ -1282,50 +1252,37 @@ void mvkPopulateShaderConversionConfig(mvk::SPIRVToMSLConversionConfiguration& s } } } -void MVKTexelBufferDescriptor::encodeToMetalArgumentBuffer(MVKResourcesCommandEncoderState* rezEncState, - MVKArgumentBufferEncoder& mvkArgBuffEnc, - uint32_t descSetIndex, - MVKDescriptorSetLayoutBinding* mvkDSLBind, - uint32_t elementIndex, - MVKShaderStage stage, - bool encodeToArgBuffer, - bool encodeUsage) { - VkDescriptorType descType = getDescriptorType(); - id mtlTexture = _mvkBufferView ? _mvkBufferView->getMTLTexture() : nil; - if (encodeToArgBuffer) { - uint32_t argIdx = mvkDSLBind->getMetalResourceIndexOffsets().textureIndex + elementIndex; - mvkArgBuffEnc.setTexture(mtlTexture, argIdx); - } - if (encodeUsage) { - rezEncState->encodeResourceUsage(stage, mtlTexture, getMTLResourceUsage(), mvkDSLBind->getMTLRenderStages()); - } - - if (descType == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER && !mvkDSLBind->getMetalFeatures().nativeTextureAtomics) { - id mtlBuff = mtlTexture.buffer; - if (mtlBuff) { - if (encodeToArgBuffer) { - uint32_t argIdx = mvkDSLBind->getMetalResourceIndexOffsets().bufferIndex + elementIndex; - mvkArgBuffEnc.setBuffer(mtlBuff, mtlTexture.bufferOffset, argIdx); - } - if (encodeUsage) { - rezEncState->encodeResourceUsage(stage, mtlBuff, getMTLResourceUsage(), mvkDSLBind->getMTLRenderStages()); - } - } - } -} void MVKTexelBufferDescriptor::write(MVKDescriptorSetLayoutBinding* mvkDSLBind, MVKDescriptorSet* mvkDescSet, - uint32_t srcIndex, - size_t stride, + uint32_t dstIdx, + uint32_t srcIdx, + size_t srcStride, const void* pData) { auto* oldBuffView = _mvkBufferView; - const auto* pBuffView = &get(pData, stride, srcIndex); + const auto* pBuffView = &get(pData, srcStride, srcIdx); _mvkBufferView = (MVKBufferView*)*pBuffView; if (_mvkBufferView) { _mvkBufferView->retain(); } if (oldBuffView) { oldBuffView->release(); } + + // Write resource to Metal argument buffer + if (mvkDescSet->isUsingMetalArgumentBuffer()) { + auto& mvkArgBuff = mvkDescSet->getMetalArgumentBuffer(); + VkDescriptorType descType = getDescriptorType(); + id mtlTexture = _mvkBufferView ? _mvkBufferView->getMTLTexture() : nil; + uint32_t texArgIdx = mvkDSLBind->getMetalResourceIndexOffsets().textureIndex + dstIdx; + mvkArgBuff.setTexture(mtlTexture, texArgIdx); + + if (descType == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER) { + id mtlBuff = mtlTexture.buffer; + if (mtlBuff) { + uint32_t buffArgIdx = mvkDSLBind->getMetalResourceIndexOffsets().bufferIndex + dstIdx; + mvkArgBuff.setBuffer(mtlBuff, mtlTexture.bufferOffset, buffArgIdx); + } + } + } } void MVKTexelBufferDescriptor::read(MVKDescriptorSetLayoutBinding* mvkDSLBind, @@ -1338,6 +1295,21 @@ void mvkPopulateShaderConversionConfig(mvk::SPIRVToMSLConversionConfiguration& s pTexelBufferView[dstIndex] = (VkBufferView)_mvkBufferView; } +void MVKTexelBufferDescriptor::encodeResourceUsage(MVKResourcesCommandEncoderState* rezEncState, + MVKDescriptorSetLayoutBinding* mvkDSLBind, + MVKShaderStage stage) { + VkDescriptorType descType = getDescriptorType(); + id mtlTexture = _mvkBufferView ? _mvkBufferView->getMTLTexture() : nil; + rezEncState->encodeResourceUsage(stage, mtlTexture, getMTLResourceUsage(), mvkDSLBind->getMTLRenderStages()); + + if (descType == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER) { + id mtlBuff = mtlTexture.buffer; + if (mtlBuff) { + rezEncState->encodeResourceUsage(stage, mtlBuff, getMTLResourceUsage(), mvkDSLBind->getMTLRenderStages()); + } + } +} + void MVKTexelBufferDescriptor::reset() { if (_mvkBufferView) { _mvkBufferView->release(); } _mvkBufferView = nullptr; diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.h b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.h index a26e5531a..d3d7c422a 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.h @@ -29,40 +29,33 @@ class MVKDescriptorPool; class MVKPipelineLayout; class MVKCommandEncoder; class MVKResourcesCommandEncoderState; -struct MVKArgumentBufferEncoder; #pragma mark - -#pragma mark MVKDescriptorSetLayout +#pragma mark MVKMetalArgumentBuffer /** - * Holds and manages the lifecycle of a MTLArgumentEncoder, including locking access to the encoder, - * since it can only be used to encode to one argument buffer at a time. The internal MTLArgumentEncoder - * will be nil if the platform supports populating Metal argument buffers without a MTLArgumentEncoder. - * - * Because of the internal mutex, this encoder can only be initialized once, and because - * mutexes cannot be copied, copying this object results in an uninitialized empty object. + * Helper object to handle the placement of resources into a Metal Argument Buffer + * in a consistent manner, whether or not a MTLArgumentEncoder is required. */ -struct MVKMTLArgumentEncoder { - NSUInteger getEncodedLength() { return _mtlArgumentEncoder.encodedLength; } - void lock() { if (_mtlArgumentEncoder) { _mtlArgumentEncodingLock.lock(); } } - void unlock() { if (_mtlArgumentEncoder) { _mtlArgumentEncodingLock.unlock(); } } - void init(id mtlArgEnc) { - assert( !_mtlArgumentEncoder ); - _mtlArgumentEncoder = mtlArgEnc; - } - - MVKMTLArgumentEncoder(const MVKMTLArgumentEncoder& other) {} - MVKMTLArgumentEncoder& operator=(const MVKMTLArgumentEncoder& other) { return *this; } - MVKMTLArgumentEncoder() {} - ~MVKMTLArgumentEncoder() { [_mtlArgumentEncoder release]; } - +typedef struct MVKMetalArgumentBuffer { + void setBuffer(id mtlBuff, NSUInteger offset, uint32_t index); + void setTexture(id mtlTex, uint32_t index); + void setSamplerState(id mtlSamp, uint32_t index); + id getMetalArgumentBuffer() { return _mtlArgumentBuffer; } + NSUInteger getMetalArgumentBufferOffset() { return _mtlArgumentBufferOffset; } + void setArgumentBuffer(id mtlArgBuff, NSUInteger mtlArgBuffOfst, id mtlArgEnc); + ~MVKMetalArgumentBuffer(); protected: - friend MVKArgumentBufferEncoder; - + void* getArgumentPointer(uint32_t index) const; id _mtlArgumentEncoder = nil; - std::mutex _mtlArgumentEncodingLock; -}; + id _mtlArgumentBuffer = nil; + NSUInteger _mtlArgumentBufferOffset = 0; +} MVKMetalArgumentBuffer; + + +#pragma mark - +#pragma mark MVKDescriptorSetLayout /** Represents a Vulkan descriptor set layout. */ class MVKDescriptorSetLayout : public MVKVulkanAPIDeviceObject { @@ -124,30 +117,30 @@ class MVKDescriptorSetLayout : public MVKVulkanAPIDeviceObject { /** Returns true if this layout is using a Metal argument buffer. */ bool isUsingMetalArgumentBuffer() { return isUsingDescriptorSetMetalArgumentBuffers() && !isPushDescriptorLayout(); }; - /** Returns the MTLArgumentEncoder for the descriptor set. */ - MVKMTLArgumentEncoder& getMVKMTLArgumentEncoder() { return _mvkMTLArgumentEncoder; } - MVKDescriptorSetLayout(MVKDevice* device, const VkDescriptorSetLayoutCreateInfo* pCreateInfo); + ~MVKDescriptorSetLayout(); + protected: friend class MVKDescriptorSetLayoutBinding; friend class MVKPipelineLayout; friend class MVKDescriptorSet; + friend class MVKDescriptorPool; void propagateDebugName() override {} uint32_t getDescriptorCount() { return _descriptorCount; } uint32_t getDescriptorIndex(uint32_t binding, uint32_t elementIndex = 0) { return getBinding(binding)->getDescriptorIndex(elementIndex); } MVKDescriptorSetLayoutBinding* getBinding(uint32_t binding) { return &_bindings[_bindingToIndex[binding]]; } const VkDescriptorBindingFlags* getBindingFlags(const VkDescriptorSetLayoutCreateInfo* pCreateInfo); - void initMTLArgumentEncoder(); MVKSmallVector _bindings; std::unordered_map _bindingToIndex; - MVKMTLArgumentEncoder _mvkMTLArgumentEncoder; MVKShaderResourceBinding _mtlResourceCounts; - uint32_t _descriptorCount; - bool _isPushDescriptorLayout; + NSArray* _mtlArgumentEncoderArgs = nil; + uint64_t _mtlArgumentBufferEncodedSize = 0; + uint32_t _descriptorCount = 0; + bool _isPushDescriptorLayout = false; }; @@ -173,7 +166,7 @@ class MVKDescriptorSet : public MVKVulkanAPIDeviceObject { /** Updates the resource bindings in this instance from the specified content. */ template - void write(const DescriptorAction* pDescriptorAction, size_t stride, const void* pData); + void write(const DescriptorAction* pDescriptorAction, size_t srcStride, const void* pData); /** * Reads the resource bindings defined in the specified content @@ -187,17 +180,6 @@ class MVKDescriptorSet : public MVKVulkanAPIDeviceObject { /** Returns an MTLBuffer region allocation. */ MVKMTLBufferAllocation* acquireMTLBufferRegion(NSUInteger length); - /** - * Returns the Metal argument buffer to which resources are written, - * or return nil if Metal argument buffers are not being used. - */ - id getMetalArgumentBuffer(); - - /** Returns the offset into the Metal argument buffer to which resources are written. */ - NSUInteger getMetalArgumentBufferOffset() { return _metalArgumentBufferOffset; } - - /** Returns an array indicating the descriptors that have changed since the Metal argument buffer was last updated. */ - MVKBitArray& getMetalArgumentBufferDirtyDescriptors() { return _metalArgumentBufferDirtyDescriptors; } /** Returns the descriptor at an index. */ MVKDescriptor* getDescriptorAt(uint32_t descIndex) { return _descriptors[descIndex]; } @@ -211,6 +193,9 @@ class MVKDescriptorSet : public MVKVulkanAPIDeviceObject { /** Returns true if this descriptor set is using a Metal argument buffer. */ bool isUsingMetalArgumentBuffer() { return _layout->isUsingMetalArgumentBuffer(); }; + /** Returns the argument buffer helper object used by this descriptor set. */ + MVKMetalArgumentBuffer& getMetalArgumentBuffer() { return _argumentBuffer; } + MVKDescriptorSet(MVKDescriptorPool* pool); protected: @@ -227,8 +212,7 @@ class MVKDescriptorSet : public MVKVulkanAPIDeviceObject { MVKDescriptorPool* _pool; MVKDescriptorSetLayout* _layout; MVKSmallVector _descriptors; - MVKBitArray _metalArgumentBufferDirtyDescriptors; - NSUInteger _metalArgumentBufferOffset; + MVKMetalArgumentBuffer _argumentBuffer; uint32_t _dynamicOffsetDescriptorCount; uint32_t _variableDescriptorCount; }; diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.mm index c6bb5eaf4..07a761a0c 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.mm @@ -24,6 +24,54 @@ #include "MVKOSExtensions.h" +#pragma mark - +#pragma mark MVKMetalArgumentBuffer + +void MVKMetalArgumentBuffer::setArgumentBuffer(id mtlArgBuff, + NSUInteger mtlArgBuffOfst, + id mtlArgEnc) { + _mtlArgumentBuffer = mtlArgBuff; + _mtlArgumentBufferOffset = mtlArgBuffOfst; + + auto* oldArgEnc = _mtlArgumentEncoder; + _mtlArgumentEncoder = [mtlArgEnc retain]; // retained + [_mtlArgumentEncoder setArgumentBuffer: _mtlArgumentBuffer offset: _mtlArgumentBufferOffset]; + [oldArgEnc release]; +} + +void MVKMetalArgumentBuffer::setBuffer(id mtlBuff, NSUInteger offset, uint32_t index) { + if (_mtlArgumentEncoder) { + [_mtlArgumentEncoder setBuffer: mtlBuff offset: offset atIndex: index]; + } else { + *(uint64_t*)getArgumentPointer(index) = mtlBuff.gpuAddress + offset; + } +} + +void MVKMetalArgumentBuffer::setTexture(id mtlTex, uint32_t index) { + if (_mtlArgumentEncoder) { + [_mtlArgumentEncoder setTexture: mtlTex atIndex: index]; + } else { + *(MTLResourceID*)getArgumentPointer(index) = mtlTex.gpuResourceID; + } +} + +void MVKMetalArgumentBuffer::setSamplerState(id mtlSamp, uint32_t index) { + if (_mtlArgumentEncoder) { + [_mtlArgumentEncoder setSamplerState: mtlSamp atIndex: index]; + } else { + *(MTLResourceID*)getArgumentPointer(index) = mtlSamp.gpuResourceID; + } +} + +// Returns the address of the slot at the index within the Metal argument buffer. +// This is based on the Metal 3 design that all arg buffer slots are 64 bits. +void* MVKMetalArgumentBuffer::getArgumentPointer(uint32_t index) const { + return (void*)((uintptr_t)_mtlArgumentBuffer.contents + _mtlArgumentBufferOffset + (index * kMVKMetal3ArgBuffSlotSizeInBytes)); +} + +MVKMetalArgumentBuffer::~MVKMetalArgumentBuffer() { [_mtlArgumentEncoder release]; } + + #pragma mark - #pragma mark MVKDescriptorSetLayout @@ -234,7 +282,6 @@ return bindInfo1.pBinding->binding < bindInfo2.pBinding->binding; }); - _descriptorCount = 0; _isPushDescriptorLayout = mvkIsAnyFlagEnabled(pCreateInfo->flags, VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR); _bindings.reserve(bindCnt); @@ -245,7 +292,23 @@ _descriptorCount += _bindings.back().getDescriptorCount(); } - initMTLArgumentEncoder(); + if (isUsingMetalArgumentBuffer()) { + if (needsMetalArgumentBufferEncoders()) { + @autoreleasepool { + auto* mutableArgs = [[NSMutableArray alloc] initWithCapacity: _bindings.size()]; + for (auto& dslBind : _bindings) { + dslBind.addMTLArgumentDescriptors(mutableArgs); + } + _mtlArgumentEncoderArgs = mutableArgs; // retained + _mtlArgumentBufferEncodedSize = [[getMTLDevice() newArgumentEncoderWithArguments: _mtlArgumentEncoderArgs] autorelease].encodedLength; + } + } else { + for (auto& dslBind : _bindings) { + _mtlArgumentBufferEncodedSize += dslBind.getResourceCount() * kMVKMetal3ArgBuffSlotSizeInBytes; + } + } + } + } // Find and return an array of binding flags from the pNext chain of pCreateInfo, @@ -264,18 +327,8 @@ return nullptr; } -void MVKDescriptorSetLayout::initMTLArgumentEncoder() { - if (isUsingMetalArgumentBuffer() && needsMetalArgumentBufferEncoders()) { - @autoreleasepool { - NSMutableArray* args = [NSMutableArray arrayWithCapacity: _bindings.size()]; - for (auto& dslBind : _bindings) { - dslBind.addMTLArgumentDescriptors(args); - } - if (args.count) { - _mvkMTLArgumentEncoder.init([getMTLDevice() newArgumentEncoderWithArguments: args]); - } - } - } +MVKDescriptorSetLayout::~MVKDescriptorSetLayout() { + [_mtlArgumentEncoderArgs release]; } @@ -290,33 +343,24 @@ return _descriptors[_layout->getDescriptorIndex(binding, elementIndex)]; } -id MVKDescriptorSet::getMetalArgumentBuffer() { return _pool->_metalArgumentBuffer; } - template void MVKDescriptorSet::write(const DescriptorAction* pDescriptorAction, - size_t stride, + size_t srcStride, const void* pData) { -#define writeDescriptorAt(IDX) \ - do { \ - MVKDescriptor* mvkDesc = _descriptors[descIdx]; \ - if (mvkDesc->getDescriptorType() == descType) { \ - mvkDesc->write(mvkDSLBind, this, IDX, stride, pData); \ - _metalArgumentBufferDirtyDescriptors.setBit(descIdx); \ - } \ - } while(false) MVKDescriptorSetLayoutBinding* mvkDSLBind = _layout->getBinding(pDescriptorAction->dstBinding); VkDescriptorType descType = mvkDSLBind->getDescriptorType(); if (descType == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT) { // For inline buffers dstArrayElement is a byte offset uint32_t descIdx = _layout->getDescriptorIndex(pDescriptorAction->dstBinding); - writeDescriptorAt(pDescriptorAction->dstArrayElement); + _descriptors[descIdx]->write(mvkDSLBind, this, pDescriptorAction->dstArrayElement, 0, srcStride, pData); } else { uint32_t descStartIdx = _layout->getDescriptorIndex(pDescriptorAction->dstBinding, pDescriptorAction->dstArrayElement); + uint32_t dstStartIdx = pDescriptorAction->dstArrayElement; uint32_t elemCnt = std::min(pDescriptorAction->descriptorCount, (uint32_t)_descriptors.size() - descStartIdx); for (uint32_t elemIdx = 0; elemIdx < elemCnt; elemIdx++) { uint32_t descIdx = descStartIdx + elemIdx; - writeDescriptorAt(elemIdx); + _descriptors[descIdx]->write(mvkDSLBind, this, dstStartIdx + elemIdx, elemIdx, srcStride, pData); } } } @@ -357,12 +401,15 @@ _layout = layout; _variableDescriptorCount = variableDescriptorCount; - // If the Metal argument buffer offset has not been set yet, set it now. - if ( !_metalArgumentBufferOffset ) { _metalArgumentBufferOffset = mtlArgBufferOffset; } + id mtlArgEnc = nil; + if (isUsingMetalArgumentBuffer() && needsMetalArgumentBufferEncoders()) { + mtlArgEnc = [getMTLDevice() newArgumentEncoderWithArguments: layout->_mtlArgumentEncoderArgs]; // temp retain + } + _argumentBuffer.setArgumentBuffer(_pool->_metalArgumentBuffer, mtlArgBufferOffset, mtlArgEnc); + [mtlArgEnc release]; // release temp retain uint32_t descCnt = layout->getDescriptorCount(); _descriptors.reserve(descCnt); - _metalArgumentBufferDirtyDescriptors.resize(descCnt); uint32_t bindCnt = (uint32_t)layout->_bindings.size(); for (uint32_t bindIdx = 0; bindIdx < bindCnt; bindIdx++) { @@ -370,15 +417,15 @@ uint32_t elemCnt = mvkDSLBind->getDescriptorCount(this); for (uint32_t elemIdx = 0; elemIdx < elemCnt; elemIdx++) { VkDescriptorType descType = mvkDSLBind->getDescriptorType(); - uint32_t descIdx = (uint32_t)_descriptors.size(); MVKDescriptor* mvkDesc = nullptr; setConfigurationResult(_pool->allocateDescriptor(descType, &mvkDesc)); if ( !wasConfigurationSuccessful() ) { return getConfigurationResult(); } if (mvkDesc->usesDynamicBufferOffsets()) { _dynamicOffsetDescriptorCount++; } - if (mvkDSLBind->usesImmutableSamplers()) { _metalArgumentBufferDirtyDescriptors.setBit(descIdx); } _descriptors.push_back(mvkDesc); } + mvkDSLBind->encodeImmutableSamplersToMetalArgumentBuffer(this); } + return getConfigurationResult(); } @@ -387,8 +434,7 @@ _dynamicOffsetDescriptorCount = 0; _variableDescriptorCount = 0; - // Only reset the Metal arg buffer offset if the entire pool is being reset - if (isPoolReset) { _metalArgumentBufferOffset = 0; } + if (isPoolReset) { _argumentBuffer.setArgumentBuffer(_pool->_metalArgumentBuffer, 0, nil); } // Pooled descriptors don't need to be individually freed under pool resets. if ( !(_pool->_hasPooledDescriptors && isPoolReset) ) { @@ -396,7 +442,6 @@ } _descriptors.clear(); _descriptors.shrink_to_fit(); - _metalArgumentBufferDirtyDescriptors.resize(0); clearConfigurationResult(); } @@ -493,15 +538,12 @@ uint32_t variableDescriptorCount, VkDescriptorSet* pVKDS) { VkResult rslt = VK_ERROR_OUT_OF_POOL_MEMORY; - NSUInteger mtlArgBuffAllocSize = mvkDSL->getMVKMTLArgumentEncoder().getEncodedLength(); - NSUInteger mtlArgBuffAlignedSize = mvkAlignByteCount(mtlArgBuffAllocSize, - getMetalFeatures().mtlBufferAlignment); - + NSUInteger mtlArgBuffEncSize = mvkDSL->_mtlArgumentBufferEncodedSize; size_t dsCnt = _descriptorSetAvailablility.size(); _descriptorSetAvailablility.enumerateEnabledBits(true, [&](size_t dsIdx) { bool isSpaceAvail = true; // If not using Metal arg buffers, space will always be available. MVKDescriptorSet* mvkDS = &_descriptorSets[dsIdx]; - NSUInteger mtlArgBuffOffset = mvkDS->_metalArgumentBufferOffset; + NSUInteger mtlArgBuffOffset = mvkDS->getMetalArgumentBuffer().getMetalArgumentBufferOffset(); // If the desc set is using a Metal argument buffer, we also need to see if the desc set // will fit in the slot that might already have been allocated for it in the Metal argument @@ -513,16 +555,16 @@ // on a reset pool), set the offset and update the next available offset value. if ( !mtlArgBuffOffset && (dsIdx || !_nextMetalArgumentBufferOffset)) { mtlArgBuffOffset = _nextMetalArgumentBufferOffset; - _nextMetalArgumentBufferOffset += mtlArgBuffAlignedSize; + _nextMetalArgumentBufferOffset += mtlArgBuffEncSize; } // Get the offset of the next desc set, if one exists and // its offset has been set, or the end of the arg buffer. size_t nextDSIdx = dsIdx + 1; - NSUInteger nextOffset = (nextDSIdx < dsCnt ? _descriptorSets[nextDSIdx]._metalArgumentBufferOffset : 0); + NSUInteger nextOffset = (nextDSIdx < dsCnt ? _descriptorSets[nextDSIdx].getMetalArgumentBuffer().getMetalArgumentBufferOffset() : 0); if ( !nextOffset ) { nextOffset = _metalArgumentBuffer.length; } - isSpaceAvail = (mtlArgBuffOffset + mtlArgBuffAllocSize) <= nextOffset; + isSpaceAvail = (mtlArgBuffOffset + mtlArgBuffEncSize) <= nextOffset; } if (isSpaceAvail) { @@ -821,7 +863,7 @@ metalArgBuffSize += (overheadPerDescSet * (pCreateInfo->maxSets - 1)); // metalArgBuffSize already includes overhead for one descriptor set } else { // For Metal 3, encoders are not required, and each arg buffer entry fits into 64 bits. - metalArgBuffSize = (mtlBuffCnt + mtlTexCnt + mtlSampCnt) * sizeof(uint64_t); + metalArgBuffSize = (mtlBuffCnt + mtlTexCnt + mtlSampCnt) * kMVKMetal3ArgBuffSlotSizeInBytes; } if (metalArgBuffSize) { diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm index 92adaf7c2..8adcf84b5 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm @@ -4880,6 +4880,7 @@ static uint32_t mvkGetEntryProperty(io_registry_entry_t entry, CFStringRef prope (getMVKConfig().useMetalArgumentBuffers == MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS_ALWAYS || (getMVKConfig().useMetalArgumentBuffers == MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS_DESCRIPTOR_INDEXING && (_enabledVulkan12FeaturesNoExt.descriptorIndexing || _enabledExtensions.vk_EXT_descriptor_indexing.enabled)))); + MVKLogInfoIf(getMVKConfig().debugMode, "Descriptor sets binding resources using %s.", _isUsingDescriptorSetMetalArgumentBuffers ? "Metal argument buffers" : "discrete resource indexes"); _commandResourceFactory = new MVKCommandResourceFactory(this); From eebd82604c516e3c7be04f11a37c173ec7bee407 Mon Sep 17 00:00:00 2001 From: Bill Hollings Date: Thu, 6 Jun 2024 10:35:40 -0400 Subject: [PATCH 4/7] Fixes to Metal3 argument buffers. - Include descriptor element count when calculating Metal3 argument buffer encoded size. - Support MVKPhysicalDeviceMetalFeatures::nativeTextureAtomics in argument buffer operations. - Add ability to log descriptor set layout. - Support debug logging alongside other logging levels. --- MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.h | 10 +++- MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.mm | 51 +++++++++++++++---- .../MoltenVK/GPUObjects/MVKDescriptorSet.h | 1 + .../MoltenVK/GPUObjects/MVKDescriptorSet.mm | 17 ++++++- MoltenVK/MoltenVK/Utility/MVKLogging.h | 2 +- 5 files changed, 68 insertions(+), 13 deletions(-) diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.h b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.h index 755556c28..5afd3aa4b 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.h @@ -187,7 +187,8 @@ class MVKDescriptorSetLayoutBinding : public MVKBaseDeviceObject { uint32_t dslIndex); bool validate(MVKSampler* mvkSampler); void encodeImmutableSamplersToMetalArgumentBuffer(MVKDescriptorSet* mvkDescSet); - uint32_t getResourceCount(); + uint32_t getResourceCountPerElement(); + uint64_t getMetalArgumentBufferEncodedSize(); MVKDescriptorSetLayout* _layout; VkDescriptorSetLayoutBinding _info; @@ -673,3 +674,10 @@ class MVKStorageTexelBufferDescriptor : public MVKTexelBufferDescriptor { public: VkDescriptorType getDescriptorType() override { return VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; } }; + + +#pragma mark - +#pragma mark Support functions + +/** Returns the name of the descriptor type. */ +const char* mvkVkDescriptorTypeName(VkDescriptorType vkDescType); diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.mm index caa75684e..6c0d8b02d 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.mm @@ -491,18 +491,22 @@ void mvkPopulateShaderConversionConfig(mvk::SPIRVToMSLConversionConfiguration& s [args addObject: argDesc]; } -uint32_t MVKDescriptorSetLayoutBinding::getResourceCount() { - switch (getDescriptorType()) { - case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: - case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: +uint32_t MVKDescriptorSetLayoutBinding::getResourceCountPerElement() { + switch (_info.descriptorType) { case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: return 2; - + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + return getMetalFeatures().nativeTextureAtomics ? 1 : 2; default: return 1; } } +uint64_t MVKDescriptorSetLayoutBinding::getMetalArgumentBufferEncodedSize() { + return getResourceCountPerElement() * getDescriptorCount() * kMVKMetal3ArgBuffSlotSizeInBytes; +} + // Encodes an immutable sampler to the Metal argument buffer. void MVKDescriptorSetLayoutBinding::encodeImmutableSamplersToMetalArgumentBuffer(MVKDescriptorSet* mvkDescSet) { if ( !mvkDescSet->isUsingMetalArgumentBuffer() ) { return; } @@ -998,7 +1002,7 @@ void mvkPopulateShaderConversionConfig(mvk::SPIRVToMSLConversionConfiguration& s uint32_t texArgIdx = mvkDSLBind->getMetalResourceIndexOffsets().textureIndex + planeDescIdx; mvkArgBuff.setTexture(mtlTexture, texArgIdx); - if (descType == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE) { + if (descType == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE && !mvkDSLBind->getMetalFeatures().nativeTextureAtomics) { id mtlTex = mtlTexture.parentTexture ? mtlTexture.parentTexture : mtlTexture; id mtlBuff = mtlTex.buffer; if (mtlBuff) { @@ -1031,7 +1035,7 @@ void mvkPopulateShaderConversionConfig(mvk::SPIRVToMSLConversionConfiguration& s id mtlTexture = _mvkImageView ? _mvkImageView->getMTLTexture(planeIndex) : nil; rezEncState->encodeResourceUsage(stage, mtlTexture, getMTLResourceUsage(), mvkDSLBind->getMTLRenderStages()); - if (descType == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE) { + if (descType == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE && !mvkDSLBind->getMetalFeatures().nativeTextureAtomics) { id mtlTex = mtlTexture.parentTexture ? mtlTexture.parentTexture : mtlTexture; id mtlBuff = mtlTex.buffer; if (mtlBuff) { @@ -1275,7 +1279,7 @@ void mvkPopulateShaderConversionConfig(mvk::SPIRVToMSLConversionConfiguration& s uint32_t texArgIdx = mvkDSLBind->getMetalResourceIndexOffsets().textureIndex + dstIdx; mvkArgBuff.setTexture(mtlTexture, texArgIdx); - if (descType == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER) { + if (descType == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER && !mvkDSLBind->getMetalFeatures().nativeTextureAtomics) { id mtlBuff = mtlTexture.buffer; if (mtlBuff) { uint32_t buffArgIdx = mvkDSLBind->getMetalResourceIndexOffsets().bufferIndex + dstIdx; @@ -1302,7 +1306,7 @@ void mvkPopulateShaderConversionConfig(mvk::SPIRVToMSLConversionConfiguration& s id mtlTexture = _mvkBufferView ? _mvkBufferView->getMTLTexture() : nil; rezEncState->encodeResourceUsage(stage, mtlTexture, getMTLResourceUsage(), mvkDSLBind->getMTLRenderStages()); - if (descType == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER) { + if (descType == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER && !mvkDSLBind->getMetalFeatures().nativeTextureAtomics) { id mtlBuff = mtlTexture.buffer; if (mtlBuff) { rezEncState->encodeResourceUsage(stage, mtlBuff, getMTLResourceUsage(), mvkDSLBind->getMTLRenderStages()); @@ -1315,3 +1319,32 @@ void mvkPopulateShaderConversionConfig(mvk::SPIRVToMSLConversionConfiguration& s _mvkBufferView = nullptr; MVKDescriptor::reset(); } + + +#pragma mark - +#pragma mark Support functions + +#define CASE_STRINGIFY(V) case V: return #V + +const char* mvkVkDescriptorTypeName(VkDescriptorType vkDescType) { + switch (vkDescType) { + CASE_STRINGIFY(VK_DESCRIPTOR_TYPE_SAMPLER); + CASE_STRINGIFY(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER); + CASE_STRINGIFY(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE); + CASE_STRINGIFY(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE); + CASE_STRINGIFY(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER); + CASE_STRINGIFY(VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER); + CASE_STRINGIFY(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER); + CASE_STRINGIFY(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER); + CASE_STRINGIFY(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC); + CASE_STRINGIFY(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC); + CASE_STRINGIFY(VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT); + CASE_STRINGIFY(VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK); + CASE_STRINGIFY(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR); + CASE_STRINGIFY(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_NV); + CASE_STRINGIFY(VK_DESCRIPTOR_TYPE_SAMPLE_WEIGHT_IMAGE_QCOM); + CASE_STRINGIFY(VK_DESCRIPTOR_TYPE_BLOCK_MATCH_IMAGE_QCOM); + CASE_STRINGIFY(VK_DESCRIPTOR_TYPE_MUTABLE_EXT); + default: return "VK_UNKNOWN_VkDescriptorType"; + } +} diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.h b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.h index d3d7c422a..1aa48dac9 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.h @@ -133,6 +133,7 @@ class MVKDescriptorSetLayout : public MVKVulkanAPIDeviceObject { uint32_t getDescriptorIndex(uint32_t binding, uint32_t elementIndex = 0) { return getBinding(binding)->getDescriptorIndex(elementIndex); } MVKDescriptorSetLayoutBinding* getBinding(uint32_t binding) { return &_bindings[_bindingToIndex[binding]]; } const VkDescriptorBindingFlags* getBindingFlags(const VkDescriptorSetLayoutCreateInfo* pCreateInfo); + std::string getLogDescription(); MVKSmallVector _bindings; std::unordered_map _bindingToIndex; diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.mm index 07a761a0c..6ff53df29 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.mm @@ -22,6 +22,7 @@ #include "MVKPipeline.h" #include "MVKInstance.h" #include "MVKOSExtensions.h" +#include #pragma mark - @@ -304,11 +305,12 @@ } } else { for (auto& dslBind : _bindings) { - _mtlArgumentBufferEncodedSize += dslBind.getResourceCount() * kMVKMetal3ArgBuffSlotSizeInBytes; + _mtlArgumentBufferEncodedSize += dslBind.getMetalArgumentBufferEncodedSize(); } } } + MVKLogDebugIf(getMVKConfig().debugMode, "Created %s\n", getLogDescription().c_str()); } // Find and return an array of binding flags from the pNext chain of pCreateInfo, @@ -327,6 +329,17 @@ return nullptr; } +std::string MVKDescriptorSetLayout::getLogDescription() { + std::stringstream logMsgOut; + logMsgOut << "VkDescriptorSetLayout with " << _bindings.size() << " descriptors:"; + for (auto& dlb : _bindings) { + logMsgOut << "\n\t\t" << dlb.getDescriptorIndex() << ": "; + logMsgOut << mvkVkDescriptorTypeName(dlb.getDescriptorType()); + logMsgOut << " with " << dlb.getDescriptorCount() << " bindings."; + } + return logMsgOut.str(); +} + MVKDescriptorSetLayout::~MVKDescriptorSetLayout() { [_mtlArgumentEncoderArgs release]; } @@ -873,7 +886,7 @@ metalArgBuffSize = maxMTLBuffSize; } _metalArgumentBuffer = [getMTLDevice() newBufferWithLength: metalArgBuffSize options: MTLResourceStorageModeShared]; // retained - _metalArgumentBuffer.label = @"Descriptor pool argument buffer"; + _metalArgumentBuffer.label = @"Descriptor set argument buffer"; } } } diff --git a/MoltenVK/MoltenVK/Utility/MVKLogging.h b/MoltenVK/MoltenVK/Utility/MVKLogging.h index 50fb4fed9..e55ea5115 100644 --- a/MoltenVK/MoltenVK/Utility/MVKLogging.h +++ b/MoltenVK/MoltenVK/Utility/MVKLogging.h @@ -128,7 +128,7 @@ extern "C" { # define MVK_LOG_LEVEL_INFO MVK_LOGGING_ENABLED #endif #ifndef MVK_LOG_LEVEL_DEBUG -# define MVK_LOG_LEVEL_DEBUG (MVK_LOGGING_ENABLED && MVK_CONFIG_DEBUG) +# define MVK_LOG_LEVEL_DEBUG MVK_LOGGING_ENABLED #endif #ifndef MVK_LOG_LEVEL_TRACE # define MVK_LOG_LEVEL_TRACE 0 From 30c5b92667a36ca3cb5ea6e7566fbda892354793 Mon Sep 17 00:00:00 2001 From: Bill Hollings Date: Tue, 25 Jun 2024 17:53:09 -0400 Subject: [PATCH 5/7] Cleanup and refactor Metal argument buffers. - Update SPIRV-Cross version to include latest argument buffer fixes. - Disable CompilerMSL::Options::force_active_argument_buffer_resources. - Track OpArrayLength buffer-sizes buffer as an auxiliary buffer in each descriptor set argument buffer, as this is how SPIRV-Cross expects it. - Revert MVKConfiguration::useMetalArgumentBuffers and env var MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS to boolean value, because descriptor indexing value is not longer required. - Update max number of buffer and textures per stage to 1M. - Refactor and simplify calculating argument buffer resource indexes - Remove MVKShaderStageResourceBinding::resourceIndex, and track resource index incrementing locally. - Update MVKDescriptorSetLayout::_descriptorCount & _mtlResourceCount from within MVKDescriptorSetLayoutBinding. - Refactor and simplify tracking argument buffer usage - Rename MVKDeviceTrackingMixin::isUsingDescriptorSetMetalArgumentBuffers() to isUsingMetalArgumentBuffers(), and allow it to be overridden in MVKDescriptorSetLayout. - Move and rename MVKDevice::_isUsingDescriptorSetMetalArgumentBuffers to MVKPhysicalDevice::_isUsingMetalArgumentBuffers. - Remove MVKPhysicalDevice::supportsDescriptorSetMetalArgumentBuffers(); - Remove MVKDescriptorSetLayout::isUsingMetalArgumentBuffer() and replace with overridden isUsingMetalArgumentBuffers() function. - Rename MVKDescriptorSet::isUsingMetalArgumentBuffer to hasMetalArgumentBuffer(). - Remove MVKDescriptorSetLayoutBinding::isUsingMetalArgumentBuffer(). - Remove MVKDescriptorSetLayout::isPushDescriptorLayout(). - Update debugging logging - Refactor and enhance ability to log descriptor set descriptions for debugging. - MVKPipelineLayout support debug logging and remove a few unused member functions. - Adjust whitespace in logging of instances and devices. - Update MVK_PRIVATE_API_VERSION to version 43. --- Docs/MoltenVK_Configuration_Parameters.md | 12 +- ExternalRevisions/SPIRV-Cross_repo_revision | 2 +- MoltenVK/MoltenVK/API/mvk_private_api.h | 14 +- .../Commands/MVKCommandEncoderState.mm | 10 +- .../Commands/MVKMTLBufferAllocation.h | 2 +- MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.h | 27 ++-- MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.mm | 127 ++++++++++------- .../MoltenVK/GPUObjects/MVKDescriptorSet.h | 25 ++-- .../MoltenVK/GPUObjects/MVKDescriptorSet.mm | 130 ++++++++++++++---- MoltenVK/MoltenVK/GPUObjects/MVKDevice.h | 11 +- MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm | 68 +++++---- MoltenVK/MoltenVK/GPUObjects/MVKImage.mm | 2 +- MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm | 4 +- MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h | 13 +- MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm | 34 +++-- .../MoltenVK/Utility/MVKConfigMembers.def | 2 +- MoltenVK/MoltenVK/Utility/MVKEnvironment.h | 2 +- .../MoltenVKShaderConverterTool.cpp | 2 +- README.md | 3 +- Scripts/runcts | 2 +- 20 files changed, 294 insertions(+), 198 deletions(-) diff --git a/Docs/MoltenVK_Configuration_Parameters.md b/Docs/MoltenVK_Configuration_Parameters.md index 85493125e..06cbf3033 100644 --- a/Docs/MoltenVK_Configuration_Parameters.md +++ b/Docs/MoltenVK_Configuration_Parameters.md @@ -616,22 +616,14 @@ cleared via a call to the `vkTrimCommandPoolKHR()` command. --------------------------------------- #### MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS -##### Type: Enumeration -- `0`: Don't use _Metal_ Argument Buffers. -- `1`: Use _Metal_ Argument Buffers for all pipelines. -- `2`: Use _Metal_ Argument Buffers only if the `VK_EXT_descriptor_indexing` extension is enabled. - -##### Default: `0` +##### Type: Boolean +##### Default: `1` Controls whether **MoltenVK** should use _Metal_ argument buffers for resources defined in descriptor sets, if _Metal_ argument buffers are supported on the platform. Using _Metal_ argument buffers dramatically increases the number of buffers, textures and samplers that can be bound to a pipeline shader, and in most cases improves performance. -_**NOTE:**_ Currently, _Metal_ argument buffer support is in beta stage, and is only supported on _macOS 11.0+_, -or on older versions of _macOS_ using an _Intel_ GPU. _Metal_ argument buffers support is not available on _iOS_ or _tvOS_. -Development to support _iOS_ and _tvOS_ and a wider combination of GPU's on older _macOS_ versions is under way. - --------------------------------------- #### MVK_CONFIG_USE_MTLHEAP diff --git a/ExternalRevisions/SPIRV-Cross_repo_revision b/ExternalRevisions/SPIRV-Cross_repo_revision index c4bc48232..45d3be61a 100644 --- a/ExternalRevisions/SPIRV-Cross_repo_revision +++ b/ExternalRevisions/SPIRV-Cross_repo_revision @@ -1 +1 @@ -d47a140735cb44e511d0188a6318c365789e4699 +6fd1f75636b1c424b809ad8a84804654cf5ae48b diff --git a/MoltenVK/MoltenVK/API/mvk_private_api.h b/MoltenVK/MoltenVK/API/mvk_private_api.h index 8a92e2453..4e7e41790 100644 --- a/MoltenVK/MoltenVK/API/mvk_private_api.h +++ b/MoltenVK/MoltenVK/API/mvk_private_api.h @@ -44,7 +44,7 @@ typedef unsigned long MTLArgumentBuffersTier; */ -#define MVK_PRIVATE_API_VERSION 42 +#define MVK_PRIVATE_API_VERSION 43 #pragma mark - @@ -140,14 +140,6 @@ typedef enum MVKConfigAdvertiseExtensionBits { } MVKConfigAdvertiseExtensionBits; typedef VkFlags MVKConfigAdvertiseExtensions; -/** Identifies the use of Metal Argument Buffers. */ -typedef enum MVKUseMetalArgumentBuffers { - MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS_NEVER = 0, /**< Don't use Metal Argument Buffers. */ - MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS_ALWAYS = 1, /**< Use Metal Argument Buffers for all pipelines. */ - MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS_DESCRIPTOR_INDEXING = 2, /**< Use Metal Argument Buffers only if VK_EXT_descriptor_indexing extension is enabled. */ - MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS_MAX_ENUM = 0x7FFFFFFF -} MVKUseMetalArgumentBuffers; - /** Identifies the Metal functionality used to support Vulkan semaphore functionality (VkSemaphore). */ typedef enum MVKVkSemaphoreSupportStyle { MVK_CONFIG_VK_SEMAPHORE_SUPPORT_STYLE_SINGLE_QUEUE = 0, /**< Limit Vulkan to a single queue, with no explicit semaphore synchronization, and use Metal's implicit guarantees that all operations submitted to a queue will give the same result as if they had been run in submission order. */ @@ -240,7 +232,7 @@ typedef struct { uint32_t apiVersionToAdvertise; /**< MVK_CONFIG_API_VERSION_TO_ADVERTISE */ MVKConfigAdvertiseExtensions advertiseExtensions; /**< MVK_CONFIG_ADVERTISE_EXTENSIONS */ VkBool32 resumeLostDevice; /**< MVK_CONFIG_RESUME_LOST_DEVICE */ - MVKUseMetalArgumentBuffers useMetalArgumentBuffers; /**< MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS */ + VkBool32 useMetalArgumentBuffers; /**< MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS */ MVKConfigCompressionAlgorithm shaderSourceCompressionAlgorithm; /**< MVK_CONFIG_SHADER_COMPRESSION_ALGORITHM */ VkBool32 shouldMaximizeConcurrentCompilation; /**< MVK_CONFIG_SHOULD_MAXIMIZE_CONCURRENT_COMPILATION */ float timestampPeriodLowPassAlpha; /**< MVK_CONFIG_TIMESTAMP_PERIOD_LOWPASS_ALPHA */ @@ -353,7 +345,7 @@ typedef struct { VkBool32 textureBarriers; /**< If true, texture barriers are supported within Metal render passes. Deprecated. Will always be false on all platforms. */ VkBool32 tileBasedDeferredRendering; /**< If true, this device uses tile-based deferred rendering. */ VkBool32 argumentBuffers; /**< If true, Metal argument buffers are supported on the platform. */ - VkBool32 descriptorSetArgumentBuffers; /**< If true, a Metal argument buffers can be used for descriptor sets. */ + VkBool32 descriptorSetArgumentBuffers; /**< If true, Metal argument buffers can be used for descriptor sets. */ MVKFloatRounding clearColorFloatRounding; /**< Identifies the type of rounding Metal uses for MTLClearColor float to integer conversions. */ MVKCounterSamplingFlags counterSamplingPoints; /**< Identifies the points where pipeline GPU counter sampling may occur. */ VkBool32 programmableSamplePositions; /**< If true, programmable MSAA sample positions are supported. */ diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm index eff18582c..c77373bf0 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm +++ b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm @@ -651,7 +651,7 @@ - (void)setDepthBoundsTestAMD:(BOOL)enable minDepth:(float)minDepth maxDepth:(fl _boundDescriptorSets[descSetIndex] = descSet; - if (descSet->isUsingMetalArgumentBuffer()) { + if (descSet->hasMetalArgumentBuffer()) { // If the descriptor set has changed, track new resource usage. if (dsChanged) { auto& usageDirty = _metalUsageDirtyDescriptors[descSetIndex]; @@ -674,13 +674,13 @@ - (void)setDepthBoundsTestAMD:(BOOL)enable minDepth:(float)minDepth maxDepth:(fl // Encode the Metal command encoder usage for each resource, // and bind the Metal argument buffer to the command encoder. void MVKResourcesCommandEncoderState::encodeMetalArgumentBuffer(MVKShaderStage stage) { - if ( !_cmdEncoder->isUsingDescriptorSetMetalArgumentBuffers() ) { return; } + if ( !_cmdEncoder->isUsingMetalArgumentBuffers() ) { return; } MVKPipeline* pipeline = getPipeline(); uint32_t dsCnt = pipeline->getDescriptorSetCount(); for (uint32_t dsIdx = 0; dsIdx < dsCnt; dsIdx++) { auto* descSet = _boundDescriptorSets[dsIdx]; - if ( !(descSet && descSet->isUsingMetalArgumentBuffer()) ) { continue; } + if ( !(descSet && descSet->hasMetalArgumentBuffer()) ) { continue; } auto* dsLayout = descSet->getLayout(); auto& resourceUsageDirtyDescs = _metalUsageDirtyDescriptors[dsIdx]; @@ -702,6 +702,8 @@ - (void)setDepthBoundsTestAMD:(BOOL)enable minDepth:(float)minDepth maxDepth:(fl } } } + descSet->encodeAuxBufferUsage(this, stage); + // If it is needed, bind the Metal argument buffer itself to the command encoder, if (shouldBindArgBuffToStage) { @@ -724,7 +726,7 @@ - (void)setDepthBoundsTestAMD:(BOOL)enable minDepth:(float)minDepth maxDepth:(fl // Mark the resource usage as needing an update for each Metal render encoder. void MVKResourcesCommandEncoderState::markDirty() { MVKCommandEncoderState::markDirty(); - if (_cmdEncoder->isUsingDescriptorSetMetalArgumentBuffers()) { + if (_cmdEncoder->isUsingMetalArgumentBuffers()) { for (uint32_t dsIdx = 0; dsIdx < kMVKMaxDescriptorSetCount; dsIdx++) { _metalUsageDirtyDescriptors[dsIdx].setAllBits(); } diff --git a/MoltenVK/MoltenVK/Commands/MVKMTLBufferAllocation.h b/MoltenVK/MoltenVK/Commands/MVKMTLBufferAllocation.h index 95f58fde0..de09f7b7c 100644 --- a/MoltenVK/MoltenVK/Commands/MVKMTLBufferAllocation.h +++ b/MoltenVK/MoltenVK/Commands/MVKMTLBufferAllocation.h @@ -45,7 +45,7 @@ class MVKMTLBufferAllocation : public MVKBaseObject, public MVKLinkableMixin +#include + #define BIND_GRAPHICS_OR_COMPUTE(cmdEncoder, bind, pipelineBindPoint, stage, ...) \ do { \ @@ -38,7 +41,6 @@ rslt.bufferIndex = this->bufferIndex + rhs.bufferIndex; rslt.textureIndex = this->textureIndex + rhs.textureIndex; rslt.samplerIndex = this->samplerIndex + rhs.samplerIndex; - rslt.resourceIndex = this->resourceIndex + rhs.resourceIndex; rslt.dynamicOffsetBufferIndex = this->dynamicOffsetBufferIndex + rhs.dynamicOffsetBufferIndex; return rslt; } @@ -47,7 +49,6 @@ this->bufferIndex += rhs.bufferIndex; this->textureIndex += rhs.textureIndex; this->samplerIndex += rhs.samplerIndex; - this->resourceIndex += rhs.resourceIndex; this->dynamicOffsetBufferIndex += rhs.dynamicOffsetBufferIndex; return *this; } @@ -61,19 +62,15 @@ #pragma mark MVKShaderResourceBinding -uint16_t MVKShaderResourceBinding::getMaxResourceIndex() { - return std::max({stages[kMVKShaderStageVertex].resourceIndex, stages[kMVKShaderStageTessCtl].resourceIndex, stages[kMVKShaderStageTessEval].resourceIndex, stages[kMVKShaderStageFragment].resourceIndex, stages[kMVKShaderStageCompute].resourceIndex}); -} - -uint16_t MVKShaderResourceBinding::getMaxBufferIndex() { +uint32_t MVKShaderResourceBinding::getMaxBufferIndex() { return std::max({stages[kMVKShaderStageVertex].bufferIndex, stages[kMVKShaderStageTessCtl].bufferIndex, stages[kMVKShaderStageTessEval].bufferIndex, stages[kMVKShaderStageFragment].bufferIndex, stages[kMVKShaderStageCompute].bufferIndex}); } -uint16_t MVKShaderResourceBinding::getMaxTextureIndex() { +uint32_t MVKShaderResourceBinding::getMaxTextureIndex() { return std::max({stages[kMVKShaderStageVertex].textureIndex, stages[kMVKShaderStageTessCtl].textureIndex, stages[kMVKShaderStageTessEval].textureIndex, stages[kMVKShaderStageFragment].textureIndex, stages[kMVKShaderStageCompute].textureIndex}); } -uint16_t MVKShaderResourceBinding::getMaxSamplerIndex() { +uint32_t MVKShaderResourceBinding::getMaxSamplerIndex() { return std::max({stages[kMVKShaderStageVertex].samplerIndex, stages[kMVKShaderStageTessCtl].samplerIndex, stages[kMVKShaderStageTessEval].samplerIndex, stages[kMVKShaderStageFragment].samplerIndex, stages[kMVKShaderStageCompute].samplerIndex}); } @@ -101,7 +98,6 @@ void MVKShaderResourceBinding::addArgumentBuffers(uint32_t count) { for (uint32_t i = kMVKShaderStageVertex; i < kMVKShaderStageCount; i++) { stages[i].bufferIndex += count; - stages[i].resourceIndex += count; } } @@ -420,8 +416,6 @@ void mvkPopulateShaderConversionConfig(mvk::SPIRVToMSLConversionConfiguration& s } } -bool MVKDescriptorSetLayoutBinding::isUsingMetalArgumentBuffer() { return _layout->isUsingMetalArgumentBuffer(); }; - // Adds MTLArgumentDescriptors to the array, and updates resource indexes consumed. void MVKDescriptorSetLayoutBinding::addMTLArgumentDescriptors(NSMutableArray* args) { switch (getDescriptorType()) { @@ -491,7 +485,7 @@ void mvkPopulateShaderConversionConfig(mvk::SPIRVToMSLConversionConfiguration& s [args addObject: argDesc]; } -uint32_t MVKDescriptorSetLayoutBinding::getResourceCountPerElement() { +uint32_t MVKDescriptorSetLayoutBinding::getMTLResourceCountPerElement() { switch (_info.descriptorType) { case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: return 2; @@ -503,13 +497,13 @@ void mvkPopulateShaderConversionConfig(mvk::SPIRVToMSLConversionConfiguration& s } } -uint64_t MVKDescriptorSetLayoutBinding::getMetalArgumentBufferEncodedSize() { - return getResourceCountPerElement() * getDescriptorCount() * kMVKMetal3ArgBuffSlotSizeInBytes; +uint64_t MVKDescriptorSetLayoutBinding::getMetal3ArgumentBufferEncodedSize() { + return getMTLResourceCountPerElement() * getDescriptorCount() * kMVKMetal3ArgBuffSlotSizeInBytes; } // Encodes an immutable sampler to the Metal argument buffer. void MVKDescriptorSetLayoutBinding::encodeImmutableSamplersToMetalArgumentBuffer(MVKDescriptorSet* mvkDescSet) { - if ( !mvkDescSet->isUsingMetalArgumentBuffer() ) { return; } + if ( !mvkDescSet->hasMetalArgumentBuffer() ) { return; } auto& mvkArgBuff = mvkDescSet->getMetalArgumentBuffer(); size_t sCnt = _immutableSamplers.size(); @@ -527,7 +521,7 @@ void mvkPopulateShaderConversionConfig(mvk::SPIRVToMSLConversionConfiguration& s MVKShaderResourceBinding& dslMTLRezIdxOffsets, uint32_t dslIndex) { uint32_t descCnt = getDescriptorCount(); - bool isUsingMtlArgBuff = isUsingMetalArgumentBuffer(); + bool isUsingMtlArgBuff = _layout->isUsingMetalArgumentBuffers(); MVKSampler* mvkSamp = !_immutableSamplers.empty() ? _immutableSamplers.front() : nullptr; // Establish the resource indices to use, by combining the offsets of the DSL and this DSL binding. @@ -580,16 +574,26 @@ void mvkPopulateShaderConversionConfig(mvk::SPIRVToMSLConversionConfiguration& s return mtlStages; } +std::string MVKDescriptorSetLayoutBinding::getLogDescription() { + uint32_t elemCnt = getDescriptorCount(); + std::stringstream descStr; + descStr << getDescriptorIndex() << ": "; + descStr << std::left << std::setw(46) << mvkVkDescriptorTypeName(getDescriptorType()) << std::setw(0); + descStr << "with " << (hasVariableDescriptorCount() ? "up to " : "") << elemCnt << " elements"; + descStr << " at binding " << getBinding(); + if (elemCnt == 0) { descStr << " (inactive)"; } + return descStr.str(); +} + MVKDescriptorSetLayoutBinding::MVKDescriptorSetLayoutBinding(MVKDevice* device, MVKDescriptorSetLayout* layout, const VkDescriptorSetLayoutBinding* pBinding, - VkDescriptorBindingFlagsEXT bindingFlags, - uint32_t descriptorIndex) : + VkDescriptorBindingFlagsEXT bindingFlags) : MVKBaseDeviceObject(device), _layout(layout), _info(*pBinding), _flags(bindingFlags), - _descriptorIndex(descriptorIndex) { + _descriptorIndex(layout->_descriptorCount) { _info.pImmutableSamplers = nullptr; // Remove dangling pointer @@ -599,6 +603,14 @@ void mvkPopulateShaderConversionConfig(mvk::SPIRVToMSLConversionConfiguration& s initMetalResourceIndexOffsets(pBinding, stage); } + // Update descriptor set layout counts + uint32_t descCnt = getDescriptorCount(); + _layout->_descriptorCount += descCnt; + _layout->_mtlResourceCount += descCnt * getMTLResourceCountPerElement(); + if (needsBuffSizeAuxBuffer()) { + _layout->_maxBufferIndex = std::max(_layout->_maxBufferIndex, int32_t(_mtlResourceIndexOffsets.getMaxBufferIndex() + descCnt) - 1); + } + // If immutable samplers are defined, copy them in if ( pBinding->pImmutableSamplers && (pBinding->descriptorType == VK_DESCRIPTOR_TYPE_SAMPLER || @@ -640,26 +652,23 @@ void mvkPopulateShaderConversionConfig(mvk::SPIRVToMSLConversionConfiguration& s void MVKDescriptorSetLayoutBinding::initMetalResourceIndexOffsets(const VkDescriptorSetLayoutBinding* pBinding, uint32_t stage) { // Sets an index offset and updates both that index and the general resource index. - // Can be used multiply for combined multi-resource descriptor types. - // When using Metal argument buffers, we accumulate the resource indexes cummulatively, - // across all resource types, and do not increase the individual resources counts - // consumed by the descriptor set layout. -#define setResourceIndexOffset(rezIdx) \ - do { \ - bool isUsingMtlArgBuff = isUsingMetalArgumentBuffer(); \ - if (_applyToStage[stage] || isUsingMtlArgBuff) { \ - bindIdxs.rezIdx = isUsingMtlArgBuff ? dslCnts.resourceIndex : dslCnts.rezIdx; \ - dslCnts.rezIdx += isUsingMtlArgBuff ? 0 : descCnt; \ - bindIdxs.resourceIndex = dslCnts.resourceIndex; \ - dslCnts.resourceIndex += descCnt; \ - } \ - } while(false) + // Can be used more than once for combined multi-resource descriptor types. + // When using Metal argument buffers, we accumulate the resource indexes cummulatively, across all resource types. +#define setResourceIndexOffset(rezIdx) \ +if (isUsingMtlArgBuff) { \ + bindIdxs.rezIdx = _layout->_mtlResourceCount + (descCnt * descIdxOfst++); \ +} else if (_applyToStage[stage]) { \ + bindIdxs.rezIdx = dslCnts.rezIdx; \ + dslCnts.rezIdx += descCnt; \ +} + bool isUsingMtlArgBuff = _layout->isUsingMetalArgumentBuffers(); auto& mtlFeats = getMetalFeatures(); MVKShaderStageResourceBinding& bindIdxs = _mtlResourceIndexOffsets.stages[stage]; MVKShaderStageResourceBinding& dslCnts = _layout->_mtlResourceCounts.stages[stage]; - uint32_t descCnt = pBinding->descriptorType == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT ? 1 : pBinding->descriptorCount; + uint32_t descIdxOfst = 0; // Incremented in setResourceIndexOffset() when it is called more than once per desc type. + uint32_t descCnt = getDescriptorCount(); switch (pBinding->descriptorType) { case VK_DESCRIPTOR_TYPE_SAMPLER: setResourceIndexOffset(samplerIndex); @@ -705,7 +714,7 @@ void mvkPopulateShaderConversionConfig(mvk::SPIRVToMSLConversionConfiguration& s case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: setResourceIndexOffset(textureIndex); - if (!getMetalFeatures().nativeTextureAtomics) setResourceIndexOffset(bufferIndex); + if (!getMetalFeatures().nativeTextureAtomics) { setResourceIndexOffset(bufferIndex); } if (pBinding->descriptorCount > 1 && !mtlFeats.arrayOfTextures) { _layout->setConfigurationResult(reportError(VK_ERROR_FEATURE_NOT_PRESENT, "Device %s does not support arrays of textures.", _device->getName())); @@ -731,6 +740,24 @@ void mvkPopulateShaderConversionConfig(mvk::SPIRVToMSLConversionConfiguration& s } } +bool MVKDescriptorSetLayoutBinding::needsBuffSizeAuxBuffer() { + + if ( !_layout->isUsingMetalArgumentBuffers() ) { return false; } + if ( getDescriptorCount() == 0 ) { return false; } + + switch (getDescriptorType()) { + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: + case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT: + return true; + + default: + return false; + } +} + #pragma mark - #pragma mark MVKDescriptor @@ -760,6 +787,12 @@ void mvkPopulateShaderConversionConfig(mvk::SPIRVToMSLConversionConfiguration& s #pragma mark - #pragma mark MVKBufferDescriptor +uint32_t MVKBufferDescriptor::getBufferSize(VkDeviceSize dynamicOffset) { + return uint32_t((_buffRange == VK_WHOLE_SIZE + ? _mvkBuffer->getByteCount() - (_mvkBuffer->getMTLBufferOffset() + _buffOffset + dynamicOffset) + : _buffRange)); +} + // A null cmdEncoder can be passed to perform a validation pass void MVKBufferDescriptor::bind(MVKCommandEncoder* cmdEncoder, VkPipelineBindPoint pipelineBindPoint, @@ -775,10 +808,7 @@ void mvkPopulateShaderConversionConfig(mvk::SPIRVToMSLConversionConfiguration& s if (_mvkBuffer) { bb.mtlBuffer = _mvkBuffer->getMTLBuffer(); bb.offset = _mvkBuffer->getMTLBufferOffset() + _buffOffset + bufferDynamicOffset; - if (_buffRange == VK_WHOLE_SIZE) - bb.size = (uint32_t)(_mvkBuffer->getByteCount() - bb.offset); - else - bb.size = (uint32_t)_buffRange; + bb.size = getBufferSize(bufferDynamicOffset); } for (uint32_t i = kMVKShaderStageVertex; i < kMVKShaderStageCount; i++) { if (stages[i]) { @@ -805,12 +835,13 @@ void mvkPopulateShaderConversionConfig(mvk::SPIRVToMSLConversionConfiguration& s if (oldBuff) { oldBuff->release(); } // Write resource to Metal argument buffer - if (mvkDescSet->isUsingMetalArgumentBuffer()) { + if (mvkDescSet->hasMetalArgumentBuffer()) { auto& mvkArgBuff = mvkDescSet->getMetalArgumentBuffer(); uint32_t argIdx = mvkDSLBind->getMetalResourceIndexOffsets().bufferIndex + dstIdx; mvkArgBuff.setBuffer(_mvkBuffer ? _mvkBuffer->getMTLBuffer() : nil, _mvkBuffer ? _mvkBuffer->getMTLBufferOffset() + _buffOffset : 0, argIdx); + mvkDescSet->setBufferSize(argIdx, getBufferSize()); } } @@ -872,14 +903,10 @@ void mvkPopulateShaderConversionConfig(mvk::SPIRVToMSLConversionConfiguration& s void MVKInlineUniformBlockDescriptor::write(MVKDescriptorSetLayoutBinding* mvkDSLBind, MVKDescriptorSet* mvkDescSet, - uint32_t dstIdx, + uint32_t dstOffset, uint32_t srcIdx, size_t srcStride, const void* pData) { - - // For inline buffers the dstIdx is actually a dstOffset - uint32_t dstOffset = dstIdx; - // Ensure there is a destination to write to uint32_t buffSize = mvkDSLBind->_info.descriptorCount; if ( !_mvkMTLBufferAllocation ) { _mvkMTLBufferAllocation = mvkDescSet->acquireMTLBufferRegion(buffSize); } @@ -892,7 +919,7 @@ void mvkPopulateShaderConversionConfig(mvk::SPIRVToMSLConversionConfiguration& s } // Write resource to Metal argument buffer - if (mvkDescSet->isUsingMetalArgumentBuffer()) { + if (mvkDescSet->hasMetalArgumentBuffer()) { auto& mvkArgBuff = mvkDescSet->getMetalArgumentBuffer(); uint32_t argIdx = mvkDSLBind->getMetalResourceIndexOffsets().bufferIndex; mvkArgBuff.setBuffer(_mvkMTLBufferAllocation ? _mvkMTLBufferAllocation->_mtlBuffer : nil, @@ -990,7 +1017,7 @@ void mvkPopulateShaderConversionConfig(mvk::SPIRVToMSLConversionConfiguration& s if (oldImgView) { oldImgView->release(); } // Write resource to Metal argument buffer - if (mvkDescSet->isUsingMetalArgumentBuffer()) { + if (mvkDescSet->hasMetalArgumentBuffer()) { auto& mvkArgBuff = mvkDescSet->getMetalArgumentBuffer(); VkDescriptorType descType = getDescriptorType(); @@ -1103,7 +1130,7 @@ void mvkPopulateShaderConversionConfig(mvk::SPIRVToMSLConversionConfiguration& s if (oldSamp) { oldSamp->release(); } // Write resource to Metal argument buffer - if (mvkDescSet->isUsingMetalArgumentBuffer()) { + if (mvkDescSet->hasMetalArgumentBuffer()) { auto& mvkArgBuff = mvkDescSet->getMetalArgumentBuffer(); MVKSampler* imutSamp = mvkDSLBind->getImmutableSampler(dstIdx); MVKSampler* mvkSamp = imutSamp ? imutSamp : _mvkSampler; @@ -1272,7 +1299,7 @@ void mvkPopulateShaderConversionConfig(mvk::SPIRVToMSLConversionConfiguration& s if (oldBuffView) { oldBuffView->release(); } // Write resource to Metal argument buffer - if (mvkDescSet->isUsingMetalArgumentBuffer()) { + if (mvkDescSet->hasMetalArgumentBuffer()) { auto& mvkArgBuff = mvkDescSet->getMetalArgumentBuffer(); VkDescriptorType descType = getDescriptorType(); id mtlTexture = _mvkBufferView ? _mvkBufferView->getMTLTexture() : nil; diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.h b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.h index 1aa48dac9..3ab8dc986 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.h @@ -111,11 +111,8 @@ class MVKDescriptorSetLayout : public MVKVulkanAPIDeviceObject { /** Returns the binding at the index in a descriptor set layout. */ MVKDescriptorSetLayoutBinding* getBindingAt(uint32_t index) { return &_bindings[index]; } - /** Returns true if this layout is for push descriptors only. */ - bool isPushDescriptorLayout() const { return _isPushDescriptorLayout; } - - /** Returns true if this layout is using a Metal argument buffer. */ - bool isUsingMetalArgumentBuffer() { return isUsingDescriptorSetMetalArgumentBuffers() && !isPushDescriptorLayout(); }; + /** Overridden becasue descriptor sets may be marked as discrete and not use an argument buffer. */ + bool isUsingMetalArgumentBuffers() override; MVKDescriptorSetLayout(MVKDevice* device, const VkDescriptorSetLayoutCreateInfo* pCreateInfo); @@ -133,6 +130,7 @@ class MVKDescriptorSetLayout : public MVKVulkanAPIDeviceObject { uint32_t getDescriptorIndex(uint32_t binding, uint32_t elementIndex = 0) { return getBinding(binding)->getDescriptorIndex(elementIndex); } MVKDescriptorSetLayoutBinding* getBinding(uint32_t binding) { return &_bindings[_bindingToIndex[binding]]; } const VkDescriptorBindingFlags* getBindingFlags(const VkDescriptorSetLayoutCreateInfo* pCreateInfo); + uint32_t getBufferSizeBufferArgBuferIndex() { return _mtlResourceCount; } std::string getLogDescription(); MVKSmallVector _bindings; @@ -141,6 +139,8 @@ class MVKDescriptorSetLayout : public MVKVulkanAPIDeviceObject { NSArray* _mtlArgumentEncoderArgs = nil; uint64_t _mtlArgumentBufferEncodedSize = 0; uint32_t _descriptorCount = 0; + uint32_t _mtlResourceCount = 0; + int32_t _maxBufferIndex = -1; bool _isPushDescriptorLayout = false; }; @@ -179,9 +179,6 @@ class MVKDescriptorSet : public MVKVulkanAPIDeviceObject { VkBufferView* pTexelBufferView, VkWriteDescriptorSetInlineUniformBlockEXT* pInlineUniformBlock); - /** Returns an MTLBuffer region allocation. */ - MVKMTLBufferAllocation* acquireMTLBufferRegion(NSUInteger length); - /** Returns the descriptor at an index. */ MVKDescriptor* getDescriptorAt(uint32_t descIndex) { return _descriptors[descIndex]; } @@ -192,16 +189,21 @@ class MVKDescriptorSet : public MVKVulkanAPIDeviceObject { uint32_t getDynamicOffsetDescriptorCount() { return _dynamicOffsetDescriptorCount; } /** Returns true if this descriptor set is using a Metal argument buffer. */ - bool isUsingMetalArgumentBuffer() { return _layout->isUsingMetalArgumentBuffer(); }; + bool hasMetalArgumentBuffer() { return _layout->isUsingMetalArgumentBuffers(); }; /** Returns the argument buffer helper object used by this descriptor set. */ MVKMetalArgumentBuffer& getMetalArgumentBuffer() { return _argumentBuffer; } + /** Encode the buffer sizes auxiliary buffer to the GPU. */ + void encodeAuxBufferUsage(MVKResourcesCommandEncoderState* rezEncState, MVKShaderStage stage); + MVKDescriptorSet(MVKDescriptorPool* pool); protected: friend class MVKDescriptorSetLayoutBinding; friend class MVKDescriptorPool; + friend class MVKBufferDescriptor; + friend class MVKInlineUniformBlockDescriptor; void propagateDebugName() override {} MVKDescriptor* getDescriptor(uint32_t binding, uint32_t elementIndex = 0); @@ -209,11 +211,14 @@ class MVKDescriptorSet : public MVKVulkanAPIDeviceObject { uint32_t variableDescriptorCount, NSUInteger mtlArgBufferOffset); void free(bool isPoolReset); + MVKMTLBufferAllocation* acquireMTLBufferRegion(NSUInteger length); + void setBufferSize(uint32_t descIdx, uint32_t value); MVKDescriptorPool* _pool; MVKDescriptorSetLayout* _layout; MVKSmallVector _descriptors; MVKMetalArgumentBuffer _argumentBuffer; + MVKMTLBufferAllocation* _bufferSizesBuffer = nullptr; uint32_t _dynamicOffsetDescriptorCount; uint32_t _variableDescriptorCount; }; @@ -292,7 +297,7 @@ class MVKDescriptorPool : public MVKVulkanAPIDeviceObject { MVKBitArray _descriptorSetAvailablility; id _metalArgumentBuffer; NSUInteger _nextMetalArgumentBufferOffset; - MVKMTLBufferAllocator _inlineBlockMTLBufferAllocator; + MVKMTLBufferAllocator _mtlBufferAllocator; MVKDescriptorTypePool _uniformBufferDescriptors; MVKDescriptorTypePool _storageBufferDescriptors; diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.mm index 6ff53df29..9dbba6e3e 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.mm @@ -29,8 +29,8 @@ #pragma mark MVKMetalArgumentBuffer void MVKMetalArgumentBuffer::setArgumentBuffer(id mtlArgBuff, - NSUInteger mtlArgBuffOfst, - id mtlArgEnc) { + NSUInteger mtlArgBuffOfst, + id mtlArgEnc) { _mtlArgumentBuffer = mtlArgBuff; _mtlArgumentBufferOffset = mtlArgBuffOfst; @@ -90,7 +90,7 @@ if (cmdEncoder) { cmdEncoder->bindDescriptorSet(pipelineBindPoint, descSetIndex, descSet, dslMTLRezIdxOffsets, dynamicOffsets, dynamicOffsetIndex); } - if ( !isUsingMetalArgumentBuffer() ) { + if ( !isUsingMetalArgumentBuffers() ) { for (auto& dslBind : _bindings) { dslBind.bind(cmdEncoder, pipelineBindPoint, descSet, dslMTLRezIdxOffsets, dynamicOffsets, dynamicOffsetIndex); } @@ -222,6 +222,24 @@ } } +static void populateAuxBuffer(mvk::SPIRVToMSLConversionConfiguration& shaderConfig, + MVKShaderStageResourceBinding buffBinding, + uint32_t descSetIndex, + uint32_t descBinding, + bool usingNativeTextureAtomics) { + for (uint32_t stage = kMVKShaderStageVertex; stage < kMVKShaderStageCount; stage++) { + mvkPopulateShaderConversionConfig(shaderConfig, + buffBinding, + MVKShaderStage(stage), + descSetIndex, + descBinding, + 1, + VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + nullptr, + usingNativeTextureAtomics); + } +} + void MVKDescriptorSetLayout::populateShaderConversionConfig(mvk::SPIRVToMSLConversionConfiguration& shaderConfig, MVKShaderResourceBinding& dslMTLRezIdxOffsets, uint32_t descSetIndex) { @@ -230,8 +248,18 @@ _bindings[bindIdx].populateShaderConversionConfig(shaderConfig, dslMTLRezIdxOffsets, descSetIndex); } - // Mark if Metal argument buffers are in use, but this descriptor set layout is not using them. - if (isUsingDescriptorSetMetalArgumentBuffers() && !isUsingMetalArgumentBuffer()) { + // If this descriptor set is using an argument buffer, add the buffer size auxiliary buffer. + if (isUsingMetalArgumentBuffers()) { + MVKShaderStageResourceBinding buffBinding; + buffBinding.bufferIndex = getBufferSizeBufferArgBuferIndex(); + populateAuxBuffer(shaderConfig, buffBinding, descSetIndex, + MVK_spirv_cross::kBufferSizeBufferBinding, + getMetalFeatures().nativeTextureAtomics); + } + + // If the app is using argument buffers, but this descriptor set is + // not, because this is a discrete descriptor set, mark it as such. + if(MVKDeviceTrackingMixin::isUsingMetalArgumentBuffers() && !isUsingMetalArgumentBuffers()) { shaderConfig.discreteDescriptorSets.push_back(descSetIndex); } } @@ -261,13 +289,26 @@ return descSetIsUsed; } +bool MVKDescriptorSetLayout::isUsingMetalArgumentBuffers() { + return MVKDeviceTrackingMixin::isUsingMetalArgumentBuffers() && !_isPushDescriptorLayout; +}; + +// Returns an autoreleased MTLArgumentDescriptor suitable for adding an auxiliary buffer to the argument buffer. +static MTLArgumentDescriptor* getAuxBufferArgumentDescriptor(uint32_t argIndex) { + auto* argDesc = [MTLArgumentDescriptor argumentDescriptor]; + argDesc.dataType = MTLDataTypePointer; + argDesc.access = MTLArgumentAccessReadWrite; + argDesc.index = argIndex; + argDesc.arrayLength = 1; + return argDesc; +} + MVKDescriptorSetLayout::MVKDescriptorSetLayout(MVKDevice* device, const VkDescriptorSetLayoutCreateInfo* pCreateInfo) : MVKVulkanAPIDeviceObject(device) { - uint32_t bindCnt = pCreateInfo->bindingCount; const auto* pBindingFlags = getBindingFlags(pCreateInfo); - // The bindings in VkDescriptorSetLayoutCreateInfo do not need to provided in order of binding number. + // The bindings in VkDescriptorSetLayoutCreateInfo do not need to be provided in order of binding number. // However, several subsequent operations, such as the dynamic offsets in vkCmdBindDescriptorSets() // are ordered by binding number. To prepare for this, sort the bindings by binding number. struct BindInfo { @@ -288,24 +329,37 @@ _bindings.reserve(bindCnt); for (uint32_t bindIdx = 0; bindIdx < bindCnt; bindIdx++) { BindInfo& bindInfo = sortedBindings[bindIdx]; - _bindings.emplace_back(_device, this, bindInfo.pBinding, bindInfo.bindingFlags, _descriptorCount); + _bindings.emplace_back(_device, this, bindInfo.pBinding, bindInfo.bindingFlags); _bindingToIndex[bindInfo.pBinding->binding] = bindIdx; - _descriptorCount += _bindings.back().getDescriptorCount(); } - if (isUsingMetalArgumentBuffer()) { + if (isUsingMetalArgumentBuffers()) { + bool needsBuffSizeAuxBuff = false; if (needsMetalArgumentBufferEncoders()) { @autoreleasepool { auto* mutableArgs = [[NSMutableArray alloc] initWithCapacity: _bindings.size()]; for (auto& dslBind : _bindings) { dslBind.addMTLArgumentDescriptors(mutableArgs); + needsBuffSizeAuxBuff = needsBuffSizeAuxBuff || dslBind.needsBuffSizeAuxBuffer(); + } + + // Possibly add buffer sizes auxiliary buffer. + if (needsBuffSizeAuxBuff) { + [mutableArgs addObject: getAuxBufferArgumentDescriptor(getBufferSizeBufferArgBuferIndex())]; } + _mtlArgumentEncoderArgs = mutableArgs; // retained _mtlArgumentBufferEncodedSize = [[getMTLDevice() newArgumentEncoderWithArguments: _mtlArgumentEncoderArgs] autorelease].encodedLength; } } else { for (auto& dslBind : _bindings) { - _mtlArgumentBufferEncodedSize += dslBind.getMetalArgumentBufferEncodedSize(); + _mtlArgumentBufferEncodedSize += dslBind.getMetal3ArgumentBufferEncodedSize(); + needsBuffSizeAuxBuff = needsBuffSizeAuxBuff || dslBind.needsBuffSizeAuxBuffer(); + } + + // Possibly add buffer sizes auxiliary buffer. + if (needsBuffSizeAuxBuff) { + _mtlArgumentBufferEncodedSize += kMVKMetal3ArgBuffSlotSizeInBytes; } } } @@ -330,14 +384,12 @@ } std::string MVKDescriptorSetLayout::getLogDescription() { - std::stringstream logMsgOut; - logMsgOut << "VkDescriptorSetLayout with " << _bindings.size() << " descriptors:"; + std::stringstream descStr; + descStr << "VkDescriptorSetLayout " << this << " with " << _bindings.size() << " bindings:"; for (auto& dlb : _bindings) { - logMsgOut << "\n\t\t" << dlb.getDescriptorIndex() << ": "; - logMsgOut << mvkVkDescriptorTypeName(dlb.getDescriptorType()); - logMsgOut << " with " << dlb.getDescriptorCount() << " bindings."; + descStr << "\n\t" << dlb.getLogDescription(); } - return logMsgOut.str(); + return descStr.str(); } MVKDescriptorSetLayout::~MVKDescriptorSetLayout() { @@ -405,7 +457,7 @@ } MVKMTLBufferAllocation* MVKDescriptorSet::acquireMTLBufferRegion(NSUInteger length) { - return _pool->_inlineBlockMTLBufferAllocator.acquireMTLBufferRegion(length); + return _pool->_mtlBufferAllocator.acquireMTLBufferRegion(length); } VkResult MVKDescriptorSet::allocate(MVKDescriptorSetLayout* layout, @@ -415,7 +467,7 @@ _variableDescriptorCount = variableDescriptorCount; id mtlArgEnc = nil; - if (isUsingMetalArgumentBuffer() && needsMetalArgumentBufferEncoders()) { + if (hasMetalArgumentBuffer() && needsMetalArgumentBufferEncoders()) { mtlArgEnc = [getMTLDevice() newArgumentEncoderWithArguments: layout->_mtlArgumentEncoderArgs]; // temp retain } _argumentBuffer.setArgumentBuffer(_pool->_metalArgumentBuffer, mtlArgBufferOffset, mtlArgEnc); @@ -439,6 +491,16 @@ mvkDSLBind->encodeImmutableSamplersToMetalArgumentBuffer(this); } + // If needed, allocate a MTLBuffer to track buffer sizes, and add it to the argument buffer. + // If this desc set doesn't contain buffers, buffSizesSlotCount will be zero, since _maxBufferIndex starts at -1. + uint32_t buffSizesSlotCount = _layout->_maxBufferIndex + 1; + if (buffSizesSlotCount) { + _bufferSizesBuffer = acquireMTLBufferRegion(buffSizesSlotCount * sizeof(uint32_t)); + _argumentBuffer.setBuffer(_bufferSizesBuffer->_mtlBuffer, + _bufferSizesBuffer->_offset, + _layout->getBufferSizeBufferArgBuferIndex()); + } + return getConfigurationResult(); } @@ -456,9 +518,27 @@ _descriptors.clear(); _descriptors.shrink_to_fit(); + if (_bufferSizesBuffer) { + _bufferSizesBuffer->returnToPool(); + _bufferSizesBuffer = nullptr; + } + clearConfigurationResult(); } +void MVKDescriptorSet::setBufferSize(uint32_t descIdx, uint32_t value) { + if (_bufferSizesBuffer) { + *(uint32_t*)((uintptr_t)_bufferSizesBuffer->getContents() + (descIdx * sizeof(uint32_t))) = value; + } +} + +void MVKDescriptorSet::encodeAuxBufferUsage(MVKResourcesCommandEncoderState* rezEncState, MVKShaderStage stage) { + if (_bufferSizesBuffer) { + MTLRenderStages mtlRendStages = MTLRenderStageVertex | MTLRenderStageFragment; + rezEncState->encodeResourceUsage(stage, _bufferSizesBuffer->_mtlBuffer, MTLResourceUsageRead, mtlRendStages); + } +} + MVKDescriptorSet::MVKDescriptorSet(MVKDescriptorPool* pool) : MVKVulkanAPIDeviceObject(pool->_device), _pool(pool) { free(true); } @@ -521,7 +601,7 @@ const auto* pVarDescCounts = getVariableDecriptorCounts(pAllocateInfo); for (uint32_t dsIdx = 0; dsIdx < pAllocateInfo->descriptorSetCount; dsIdx++) { MVKDescriptorSetLayout* mvkDSL = (MVKDescriptorSetLayout*)pAllocateInfo->pSetLayouts[dsIdx]; - if ( !mvkDSL->isPushDescriptorLayout() ) { + if ( !mvkDSL->_isPushDescriptorLayout ) { rslt = allocateDescriptorSet(mvkDSL, (pVarDescCounts ? pVarDescCounts[dsIdx] : 0), &pDescriptorSets[dsIdx]); if (rslt) { return rslt; } } @@ -562,7 +642,7 @@ // will fit in the slot that might already have been allocated for it in the Metal argument // buffer from a previous allocation that was returned. If this pool has been reset recently, // then the desc sets will not have had a Metal argument buffer allocation assigned yet. - if (mvkDSL->isUsingMetalArgumentBuffer()) { + if (mvkDSL->isUsingMetalArgumentBuffers()) { // If the offset has not been set (and it's not the first desc set except // on a reset pool), set the offset and update the next available offset value. @@ -774,7 +854,7 @@ _hasPooledDescriptors(getMVKConfig().preallocateDescriptors), // Set this first! Accessed by MVKDescriptorSet constructor and getPoolSize() in following lines. _descriptorSets(pCreateInfo->maxSets, MVKDescriptorSet(this)), _descriptorSetAvailablility(pCreateInfo->maxSets, true), - _inlineBlockMTLBufferAllocator(device, getMetalFeatures().dynamicMTLBufferSize, true), + _mtlBufferAllocator(_device, getMetalFeatures().maxMTLBufferSize, true), _uniformBufferDescriptors(getPoolSize(pCreateInfo, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER)), _storageBufferDescriptors(getPoolSize(pCreateInfo, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)), _uniformBufferDynamicDescriptors(getPoolSize(pCreateInfo, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC)), @@ -794,7 +874,7 @@ _metalArgumentBuffer = nil; _nextMetalArgumentBufferOffset = 0; - if ( !isUsingDescriptorSetMetalArgumentBuffers() ) { return; } + if ( !isUsingMetalArgumentBuffers() ) { return; } auto& mtlFeats = getMetalFeatures(); @autoreleasepool { @@ -854,6 +934,10 @@ } } + // To support the SPIR-V OpArrayLength operation, for each descriptor set that + // contain buffers, we add an additional buffer at the end to track buffer sizes. + mtlBuffCnt += std::min(mtlBuffCnt, pCreateInfo->maxSets); + // Each descriptor set uses a separate Metal argument buffer, but all of these descriptor set // Metal argument buffers share a single MTLBuffer. This single MTLBuffer needs to be large // enough to hold all of the encoded resources for the descriptors. diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h index 74f387cf7..bab0818da 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h @@ -352,11 +352,6 @@ class MVKPhysicalDevice : public MVKDispatchableVulkanAPIObject { /** Returns whether the MSL version is supported on this device. */ bool mslVersionIsAtLeast(MTLLanguageVersion minVer) { return _metalFeatures.mslVersionEnum >= minVer; } - /** Returns whether this physical device supports using Metal argument buffers for descriptor sets. */ - bool supportsDescriptorSetMetalArgumentBuffers() { - return _metalFeatures.descriptorSetArgumentBuffers && getMVKConfig().useMetalArgumentBuffers != MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS_NEVER; - }; - /** Returns the MTLStorageMode that matches the Vulkan memory property flags. */ MTLStorageMode getMTLStorageModeFromVkMemoryPropertyFlags(VkMemoryPropertyFlags vkFlags); @@ -443,6 +438,7 @@ class MVKPhysicalDevice : public MVKDispatchableVulkanAPIObject { uint32_t _lazilyAllocatedMemoryTypes; bool _hasUnifiedMemory = true; bool _isAppleGPU = true; + bool _isUsingMetalArgumentBuffers = false; }; @@ -869,7 +865,6 @@ class MVKDevice : public MVKDispatchableVulkanAPIObject { int _capturePipeFileDesc = -1; bool _isPerformanceTracking = false; bool _isCurrentlyAutoGPUCapturing = false; - bool _isUsingDescriptorSetMetalArgumentBuffers = false; }; @@ -904,7 +899,7 @@ class MVKDeviceTrackingMixin { bool isAppleGPU() { return _device->_physicalDevice->_isAppleGPU; } /** Returns whether this device is using one Metal argument buffer for each descriptor set, on multiple pipeline and pipeline stages. */ - bool isUsingDescriptorSetMetalArgumentBuffers() { return _device->_isUsingDescriptorSetMetalArgumentBuffers && getMetalFeatures().descriptorSetArgumentBuffers; }; + virtual bool isUsingMetalArgumentBuffers() { return _device->_physicalDevice->_isUsingMetalArgumentBuffers; }; /** Returns whether this device needs Metal argument buffer encoders to populate argument buffer content. */ bool needsMetalArgumentBufferEncoders() { return _device->_physicalDevice->_metalFeatures.needsArgumentBufferEncoders; }; @@ -962,6 +957,8 @@ class MVKDeviceTrackingMixin { /** Constructs an instance for the specified device. */ MVKDeviceTrackingMixin(MVKDevice* device) : _device(device) { assert(_device); } + virtual ~MVKDeviceTrackingMixin() {} + protected: MVKDevice* _device; }; diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm index 8adcf84b5..72339bdd3 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm @@ -524,7 +524,7 @@ uint32_t uintMax = std::numeric_limits::max(); uint32_t maxSamplerCnt = getMaxSamplerCount(); - bool isTier2 = supportsDescriptorSetMetalArgumentBuffers() && (_metalFeatures.argumentBuffersTier >= MTLArgumentBuffersTier2); + bool isTier2 = _isUsingMetalArgumentBuffers && (_metalFeatures.argumentBuffersTier >= MTLArgumentBuffersTier2); // Create a SSOT for these Vulkan 1.1 properties, which can be queried via two mechanisms here. VkPhysicalDeviceVulkan11Properties supportedProps11; @@ -576,19 +576,19 @@ supportedProps12.robustBufferAccessUpdateAfterBind = _features.robustBufferAccess; supportedProps12.quadDivergentImplicitLod = false; supportedProps12.maxPerStageDescriptorUpdateAfterBindSamplers = isTier2 ? maxSamplerCnt : _properties.limits.maxPerStageDescriptorSamplers; - supportedProps12.maxPerStageDescriptorUpdateAfterBindUniformBuffers = isTier2 ? 500000 : _properties.limits.maxPerStageDescriptorUniformBuffers; - supportedProps12.maxPerStageDescriptorUpdateAfterBindStorageBuffers = isTier2 ? 500000 : _properties.limits.maxPerStageDescriptorStorageBuffers; - supportedProps12.maxPerStageDescriptorUpdateAfterBindSampledImages = isTier2 ? 500000 : _properties.limits.maxPerStageDescriptorSampledImages; - supportedProps12.maxPerStageDescriptorUpdateAfterBindStorageImages = isTier2 ? 500000 : _properties.limits.maxPerStageDescriptorStorageImages; + supportedProps12.maxPerStageDescriptorUpdateAfterBindUniformBuffers = isTier2 ? 1e6 : _properties.limits.maxPerStageDescriptorUniformBuffers; + supportedProps12.maxPerStageDescriptorUpdateAfterBindStorageBuffers = isTier2 ? 1e6 : _properties.limits.maxPerStageDescriptorStorageBuffers; + supportedProps12.maxPerStageDescriptorUpdateAfterBindSampledImages = isTier2 ? 1e6 : _properties.limits.maxPerStageDescriptorSampledImages; + supportedProps12.maxPerStageDescriptorUpdateAfterBindStorageImages = isTier2 ? 1e6 : _properties.limits.maxPerStageDescriptorStorageImages; supportedProps12.maxPerStageDescriptorUpdateAfterBindInputAttachments = _properties.limits.maxPerStageDescriptorInputAttachments; - supportedProps12.maxPerStageUpdateAfterBindResources = isTier2 ? 500000 : _properties.limits.maxPerStageResources; + supportedProps12.maxPerStageUpdateAfterBindResources = isTier2 ? 1e6 : _properties.limits.maxPerStageResources; supportedProps12.maxDescriptorSetUpdateAfterBindSamplers = isTier2 ? maxSamplerCnt : _properties.limits.maxDescriptorSetSamplers; - supportedProps12.maxDescriptorSetUpdateAfterBindUniformBuffers = isTier2 ? 500000 : _properties.limits.maxDescriptorSetUniformBuffers; - supportedProps12.maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = isTier2 ? 500000 : _properties.limits.maxDescriptorSetUniformBuffersDynamic; - supportedProps12.maxDescriptorSetUpdateAfterBindStorageBuffers = isTier2 ? 500000 : _properties.limits.maxDescriptorSetStorageBuffers; - supportedProps12.maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = isTier2 ? 500000 : _properties.limits.maxDescriptorSetStorageBuffersDynamic; - supportedProps12.maxDescriptorSetUpdateAfterBindSampledImages = isTier2 ? 500000 : _properties.limits.maxDescriptorSetSampledImages; - supportedProps12.maxDescriptorSetUpdateAfterBindStorageImages = isTier2 ? 500000 : _properties.limits.maxDescriptorSetStorageImages; + supportedProps12.maxDescriptorSetUpdateAfterBindUniformBuffers = isTier2 ? 1e6 : _properties.limits.maxDescriptorSetUniformBuffers; + supportedProps12.maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = isTier2 ? 1e6 : _properties.limits.maxDescriptorSetUniformBuffersDynamic; + supportedProps12.maxDescriptorSetUpdateAfterBindStorageBuffers = isTier2 ? 1e6 : _properties.limits.maxDescriptorSetStorageBuffers; + supportedProps12.maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = isTier2 ? 1e6 : _properties.limits.maxDescriptorSetStorageBuffersDynamic; + supportedProps12.maxDescriptorSetUpdateAfterBindSampledImages = isTier2 ? 1e6 : _properties.limits.maxDescriptorSetSampledImages; + supportedProps12.maxDescriptorSetUpdateAfterBindStorageImages = isTier2 ? 1e6 : _properties.limits.maxDescriptorSetStorageImages; supportedProps12.maxDescriptorSetUpdateAfterBindInputAttachments = _properties.limits.maxDescriptorSetInputAttachments; supportedProps12.supportedDepthResolveModes = (_metalFeatures.depthResolve ? VK_RESOLVE_MODE_SAMPLE_ZERO_BIT | VK_RESOLVE_MODE_MIN_BIT | VK_RESOLVE_MODE_MAX_BIT @@ -2407,6 +2407,8 @@ supportsMTLGPUFamily(Metal3) && _metalFeatures.argumentBuffersTier >= MTLArgumentBuffersTier2)); + _isUsingMetalArgumentBuffers = _metalFeatures.descriptorSetArgumentBuffers && getMVKConfig().useMetalArgumentBuffers;; + #define checkSupportsMTLCounterSamplingPoint(mtlSP, mvkSP) \ if ([_mtlDevice respondsToSelector: @selector(supportsCounterSampling:)] && \ [_mtlDevice supportsCounterSampling: MTLCounterSamplingPointAt ##mtlSP ##Boundary]) { \ @@ -3069,7 +3071,7 @@ static uint32_t mvkGetEntryProperty(io_registry_entry_t entry, CFStringRef prope // Next 4 bytes contains flags based on enabled Metal features that // might affect the contents of the pipeline cache (mostly MSL content). uint32_t mtlFeatures = 0; - mtlFeatures |= supportsDescriptorSetMetalArgumentBuffers() << 0; + mtlFeatures |= _isUsingMetalArgumentBuffers << 0; *(uint32_t*)&_properties.pipelineCacheUUID[uuidComponentOffset] = NSSwapHostIntToBig(mtlFeatures); uuidComponentOffset += sizeof(mtlFeatures); } @@ -3302,11 +3304,11 @@ static uint32_t mvkGetEntryProperty(io_registry_entry_t entry, CFStringRef prope // objects that can be created within the app. When not using argument buffers, no such // limit is imposed. This has been verified with testing up to 1M MTLSamplerStates. uint32_t MVKPhysicalDevice::getMaxSamplerCount() { - if (supportsDescriptorSetMetalArgumentBuffers()) { + if (_isUsingMetalArgumentBuffers) { return ([_mtlDevice respondsToSelector: @selector(maxArgumentBufferSamplerCount)] ? (uint32_t)_mtlDevice.maxArgumentBufferSamplerCount : 1024); } else { - return kMVKUndefinedLargeUInt32; + return 1e6; } } @@ -3474,16 +3476,17 @@ static uint32_t mvkGetEntryProperty(io_registry_entry_t entry, CFStringRef prope void MVKPhysicalDevice::logGPUInfo() { string logMsg = "GPU device:"; - logMsg += "\n\t\tmodel: %s"; - logMsg += "\n\t\ttype: %s"; - logMsg += "\n\t\tvendorID: %#06x"; - logMsg += "\n\t\tdeviceID: %#06x"; - logMsg += "\n\t\tpipelineCacheUUID: %s"; - logMsg += "\n\t\tGPU memory available: %llu MB"; - logMsg += "\n\t\tGPU memory used: %llu MB"; - logMsg += "\n\tsupports the following Metal Versions, GPU's and Feature Sets:"; - logMsg += "\n\t\tMetal Shading Language %s"; - + logMsg += "\n\tmodel: %s"; + logMsg += "\n\ttype: %s"; + logMsg += "\n\tvendorID: %#06x"; + logMsg += "\n\tdeviceID: %#06x"; + logMsg += "\n\tpipelineCacheUUID: %s"; + logMsg += "\n\tGPU memory available: %llu MB"; + logMsg += "\n\tGPU memory used: %llu MB"; + logMsg += "\n\tMetal Shading Language %s"; + logMsg += "\n\tsupports the following GPU Features:"; + + if (supportsMTLGPUFamily(Metal3)) { logMsg += "\n\t\tGPU Family Metal 3"; } #if MVK_XCODE_15 && (MVK_IOS || MVK_MACOS) if (supportsMTLGPUFamily(Apple9)) { logMsg += "\n\t\tGPU Family Apple 9"; } #endif @@ -4677,7 +4680,7 @@ static uint32_t mvkGetEntryProperty(io_registry_entry_t entry, CFStringRef prope if ( !_defaultMTLSamplerState ) { @autoreleasepool { MTLSamplerDescriptor* mtlSampDesc = [[MTLSamplerDescriptor new] autorelease]; - mtlSampDesc.supportArgumentBuffers = _isUsingDescriptorSetMetalArgumentBuffers; + mtlSampDesc.supportArgumentBuffers = _physicalDevice->_isUsingMetalArgumentBuffers; _defaultMTLSamplerState = [_physicalDevice->_mtlDevice newSamplerStateWithDescriptor: mtlSampDesc]; // retained } } @@ -4873,14 +4876,9 @@ static uint32_t mvkGetEntryProperty(io_registry_entry_t entry, CFStringRef prope } #endif - // After enableExtensions && enableFeatures - // Use Metal arg buffs if available, and config wants them either always, - // or with descriptor indexing, and descriptor indexing has been enabled. - _isUsingDescriptorSetMetalArgumentBuffers = (_physicalDevice->supportsDescriptorSetMetalArgumentBuffers() && - (getMVKConfig().useMetalArgumentBuffers == MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS_ALWAYS || - (getMVKConfig().useMetalArgumentBuffers == MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS_DESCRIPTOR_INDEXING && - (_enabledVulkan12FeaturesNoExt.descriptorIndexing || _enabledExtensions.vk_EXT_descriptor_indexing.enabled)))); - MVKLogInfoIf(getMVKConfig().debugMode, "Descriptor sets binding resources using %s.", _isUsingDescriptorSetMetalArgumentBuffers ? "Metal argument buffers" : "discrete resource indexes"); + MVKLogInfoIf(getMVKConfig().debugMode, "Descriptor sets binding resources using %s.", + _physicalDevice->_isUsingMetalArgumentBuffers ? (_physicalDevice->_metalFeatures.needsArgumentBufferEncoders + ? "Metal argument buffers" : "Metal3 argument buffers") : "discrete resource indexes"); _commandResourceFactory = new MVKCommandResourceFactory(this); @@ -4911,7 +4909,7 @@ static uint32_t mvkGetEntryProperty(io_registry_entry_t entry, CFStringRef prope } MVKLogInfo("Created VkDevice to run on GPU %s with the following %d Vulkan extensions enabled:%s", - getName(), _enabledExtensions.getEnabledCount(), _enabledExtensions.enabledNamesString("\n\t\t", true).c_str()); + getName(), _enabledExtensions.getEnabledCount(), _enabledExtensions.enabledNamesString("\n\t", true).c_str()); } // Perf stats that last the duration of the app process. diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm b/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm index 6b52b5ce5..fa48391d9 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm @@ -2458,7 +2458,7 @@ static MSLSamplerYCbCrRange getSpvSamplerYcbcrRangeFromVkSamplerYcbcrRange(VkSam ? mvkClamp(pCreateInfo->maxAnisotropy, 1.0f, getDeviceProperties().limits.maxSamplerAnisotropy) : 1); mtlSampDesc.normalizedCoordinates = !pCreateInfo->unnormalizedCoordinates; - mtlSampDesc.supportArgumentBuffers = isUsingDescriptorSetMetalArgumentBuffers(); + mtlSampDesc.supportArgumentBuffers = isUsingMetalArgumentBuffers(); // If compareEnable is true, but dynamic samplers with depth compare are not available // on this device, this sampler must only be used as an immutable sampler, and will diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm b/MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm index f4f2abb9a..b7f31d868 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm @@ -323,7 +323,7 @@ MVKLogInfo("Created VkInstance for Vulkan version %s, as requested by app, with the following %d Vulkan extensions enabled:%s", mvkGetVulkanVersionString(_appInfo.apiVersion).c_str(), _enabledExtensions.getEnabledCount(), - _enabledExtensions.enabledNamesString("\n\t\t", true).c_str()); + _enabledExtensions.enabledNamesString("\n\t", true).c_str()); _useCreationCallbacks = false; } @@ -783,7 +783,7 @@ MVK_VERSION_STRING, mvkGetVulkanVersionString(getMVKConfig().apiVersionToAdvertise).c_str(), allExtns.getEnabledCount(), - allExtns.enabledNamesString("\n\t\t", true).c_str()); + allExtns.enabledNamesString("\n\t", true).c_str()); } VkResult MVKInstance::verifyLayers(uint32_t count, const char* const* names) { diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h index d20b7b8de..97b91b993 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h @@ -76,18 +76,6 @@ class MVKPipelineLayout : public MVKVulkanAPIDeviceObject { /** Populates the specified shader conversion config. */ void populateShaderConversionConfig(SPIRVToMSLConversionConfiguration& shaderConfig); - /** Returns the number of textures in this layout. This is used to calculate the size of the swizzle buffer. */ - uint32_t getTextureCount() { return _mtlResourceCounts.getMaxTextureIndex(); } - - /** Returns the number of buffers in this layout. This is used to calculate the size of the buffer size buffer. */ - uint32_t getBufferCount() { return _mtlResourceCounts.getMaxBufferIndex(); } - - /** Returns the number of descriptor sets in this pipeline layout. */ - uint32_t getDescriptorSetCount() { return (uint32_t)_descriptorSetLayouts.size(); } - - /** Returns the number of descriptors in the descriptor set layout. */ - uint32_t getDescriptorCount(uint32_t descSetIndex) { return getDescriptorSetLayout(descSetIndex)->getDescriptorCount(); } - /** Returns the descriptor set layout. */ MVKDescriptorSetLayout* getDescriptorSetLayout(uint32_t descSetIndex) { return _descriptorSetLayouts[descSetIndex]; } @@ -101,6 +89,7 @@ class MVKPipelineLayout : public MVKVulkanAPIDeviceObject { void propagateDebugName() override {} bool stageUsesPushConstants(MVKShaderStage mvkStage); + std::string getLogDescription(); MVKSmallVector _descriptorSetLayouts; MVKSmallVector _dslMTLResourceIndexOffsets; diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm index a4cc91e00..2e5165828 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm @@ -110,7 +110,7 @@ } // Add resource bindings defined in the descriptor set layouts - uint32_t dslCnt = getDescriptorSetCount(); + auto dslCnt = _descriptorSetLayouts.size(); for (uint32_t dslIdx = 0; dslIdx < dslCnt; dslIdx++) { _descriptorSetLayouts[dslIdx]->populateShaderConversionConfig(shaderConfig, _dslMTLResourceIndexOffsets[dslIdx], @@ -128,6 +128,16 @@ return false; } +std::string MVKPipelineLayout::getLogDescription() { + std::stringstream descStr; + size_t dslCnt = _descriptorSetLayouts.size(); + descStr << "VkPipelineLayout " << this << " with " << dslCnt << " descriptor set layouts:"; + for (uint32_t dslIdx = 0; dslIdx < dslCnt; dslIdx++) { + descStr << "\n\t" << dslIdx << ": " << _descriptorSetLayouts[dslIdx]; + } + return descStr.str(); +} + MVKPipelineLayout::MVKPipelineLayout(MVKDevice* device, const VkPipelineLayoutCreateInfo* pCreateInfo) : MVKVulkanAPIDeviceObject(device) { @@ -139,7 +149,7 @@ // If we are using Metal argument buffers, consume a fixed number // of buffer indexes for the Metal argument buffers themselves. - if (isUsingDescriptorSetMetalArgumentBuffers()) { + if (isUsingMetalArgumentBuffers()) { _mtlResourceCounts.addArgumentBuffers(kMVKMaxDescriptorSetCount); } @@ -168,13 +178,15 @@ MVKShaderResourceBinding adjstdDSLRezOfsts = _mtlResourceCounts; MVKShaderResourceBinding adjstdDSLRezCnts = pDescSetLayout->_mtlResourceCounts; - if (pDescSetLayout->isUsingMetalArgumentBuffer()) { + if (pDescSetLayout->isUsingMetalArgumentBuffers()) { adjstdDSLRezOfsts.clearArgumentBufferResources(); adjstdDSLRezCnts.clearArgumentBufferResources(); } _dslMTLResourceIndexOffsets.push_back(adjstdDSLRezOfsts); _mtlResourceCounts += adjstdDSLRezCnts; } + + MVKLogDebugIf(getMVKConfig().debugMode, "Created %s\n", getLogDescription().c_str()); } MVKPipelineLayout::~MVKPipelineLayout() { @@ -200,7 +212,7 @@ const CreateInfo* pCreateInfo, SPIRVToMSLConversionConfiguration& shaderConfig, MVKShaderStage stage) { - if (isUsingDescriptorSetMetalArgumentBuffers()) { + if (isUsingMetalArgumentBuffers()) { for (uint32_t dsIdx = 0; dsIdx < _descriptorSetCount; dsIdx++) { auto* dsLayout = ((MVKPipelineLayout*)pCreateInfo->layout)->getDescriptorSetLayout(dsIdx); dsLayout->populateBindingUse(getDescriptorBindingUse(dsIdx, stage), shaderConfig, stage, dsIdx); @@ -213,7 +225,7 @@ MVKVulkanAPIDeviceObject(device), _pipelineCache(pipelineCache), _flags(flags), - _descriptorSetCount(layout->getDescriptorSetCount()), + _descriptorSetCount(uint32_t(layout->_descriptorSetLayouts.size())), _fullImageViewSwizzle(getMVKConfig().fullImageViewSwizzle) { // Establish descriptor counts and push constants use. @@ -690,7 +702,7 @@ static MVKRenderStateType getRenderStateType(VkDynamicState vkDynamicState) { pipelineStart = mvkGetTimestamp(); } - if (isUsingDescriptorSetMetalArgumentBuffers()) { _descriptorBindingUse.resize(_descriptorSetCount); } + if (isUsingMetalArgumentBuffers()) { _descriptorBindingUse.resize(_descriptorSetCount); } const char* dumpDir = getMVKConfig().shaderDumpDir; if (dumpDir && *dumpDir) { @@ -1737,9 +1749,9 @@ static MTLVertexFormat mvkAdjustFormatVectorToSize(MTLVertexFormat format, uint3 shaderConfig.options.mslOptions.r32ui_linear_texture_alignment = (uint32_t)_device->getVkFormatTexelBufferAlignment(VK_FORMAT_R32_UINT, this); shaderConfig.options.mslOptions.texture_buffer_native = mtlFeats.textureBuffers; - bool useMetalArgBuff = isUsingDescriptorSetMetalArgumentBuffers(); + bool useMetalArgBuff = isUsingMetalArgumentBuffers(); shaderConfig.options.mslOptions.argument_buffers = useMetalArgBuff; - shaderConfig.options.mslOptions.force_active_argument_buffer_resources = useMetalArgBuff; + shaderConfig.options.mslOptions.force_active_argument_buffer_resources = false; shaderConfig.options.mslOptions.pad_argument_buffer_resources = useMetalArgBuff; shaderConfig.options.mslOptions.agx_manual_cube_grad_fixup = mtlFeats.needsCubeGradWorkaround; @@ -2082,7 +2094,7 @@ static MTLVertexFormat mvkAdjustFormatVectorToSize(MTLVertexFormat format, uint3 _allowsDispatchBase = mvkAreAllFlagsEnabled(pCreateInfo->flags, VK_PIPELINE_CREATE_DISPATCH_BASE_BIT); - if (isUsingDescriptorSetMetalArgumentBuffers()) { _descriptorBindingUse.resize(_descriptorSetCount); } + if (isUsingMetalArgumentBuffers()) { _descriptorBindingUse.resize(_descriptorSetCount); } const VkPipelineCreationFeedbackCreateInfo* pFeedbackInfo = nullptr; for (const auto* next = (VkBaseInStructure*)pCreateInfo->pNext; next; next = next->pNext) { @@ -2181,9 +2193,9 @@ static MTLVertexFormat mvkAdjustFormatVectorToSize(MTLVertexFormat format, uint3 shaderConfig.options.mslOptions.texture_1D_as_2D = getMVKConfig().texture1DAs2D; shaderConfig.options.mslOptions.fixed_subgroup_size = mvkIsAnyFlagEnabled(pSS->flags, VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT) ? 0 : mtlFeats.maxSubgroupSize; - bool useMetalArgBuff = isUsingDescriptorSetMetalArgumentBuffers(); + bool useMetalArgBuff = isUsingMetalArgumentBuffers(); shaderConfig.options.mslOptions.argument_buffers = useMetalArgBuff; - shaderConfig.options.mslOptions.force_active_argument_buffer_resources = useMetalArgBuff; + shaderConfig.options.mslOptions.force_active_argument_buffer_resources = false; shaderConfig.options.mslOptions.pad_argument_buffer_resources = useMetalArgBuff; #if MVK_MACOS diff --git a/MoltenVK/MoltenVK/Utility/MVKConfigMembers.def b/MoltenVK/MoltenVK/Utility/MVKConfigMembers.def index 9c7f29139..0648ea04a 100644 --- a/MoltenVK/MoltenVK/Utility/MVKConfigMembers.def +++ b/MoltenVK/MoltenVK/Utility/MVKConfigMembers.def @@ -78,7 +78,7 @@ MVK_CONFIG_MEMBER(useMTLHeap, VkBool32, MVK_CONFIG_MEMBER(apiVersionToAdvertise, uint32_t, API_VERSION_TO_ADVERTISE) MVK_CONFIG_MEMBER(advertiseExtensions, uint32_t, ADVERTISE_EXTENSIONS) MVK_CONFIG_MEMBER(resumeLostDevice, VkBool32, RESUME_LOST_DEVICE) -MVK_CONFIG_MEMBER(useMetalArgumentBuffers, MVKUseMetalArgumentBuffers, USE_METAL_ARGUMENT_BUFFERS) +MVK_CONFIG_MEMBER(useMetalArgumentBuffers, VkBool32, USE_METAL_ARGUMENT_BUFFERS) MVK_CONFIG_MEMBER(shaderSourceCompressionAlgorithm, MVKConfigCompressionAlgorithm, SHADER_COMPRESSION_ALGORITHM) MVK_CONFIG_MEMBER(shouldMaximizeConcurrentCompilation, VkBool32, SHOULD_MAXIMIZE_CONCURRENT_COMPILATION) MVK_CONFIG_MEMBER(timestampPeriodLowPassAlpha, float, TIMESTAMP_PERIOD_LOWPASS_ALPHA) diff --git a/MoltenVK/MoltenVK/Utility/MVKEnvironment.h b/MoltenVK/MoltenVK/Utility/MVKEnvironment.h index 2ae9742f5..10d511cbc 100644 --- a/MoltenVK/MoltenVK/Utility/MVKEnvironment.h +++ b/MoltenVK/MoltenVK/Utility/MVKEnvironment.h @@ -325,7 +325,7 @@ void mvkSetConfig(MVKConfiguration& dstMVKConfig, const MVKConfiguration& srcMVK /** Support Metal argument buffers. Enabled by default. */ #ifndef MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS -# define MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS_ALWAYS +# define MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS 1 #endif /** Compress MSL shader source code in a pipeline cache. Defaults to no compression. */ diff --git a/MoltenVKShaderConverter/MoltenVKShaderConverterTool/MoltenVKShaderConverterTool.cpp b/MoltenVKShaderConverter/MoltenVKShaderConverterTool/MoltenVKShaderConverterTool.cpp index bfb84e0b4..889461514 100644 --- a/MoltenVKShaderConverter/MoltenVKShaderConverterTool/MoltenVKShaderConverterTool.cpp +++ b/MoltenVKShaderConverter/MoltenVKShaderConverterTool/MoltenVKShaderConverterTool.cpp @@ -219,7 +219,7 @@ bool MoltenVKShaderConverterTool::convertSPIRV(const vector& spv, mslContext.options.mslOptions.set_msl_version(_mslVersionMajor, _mslVersionMinor, _mslVersionPatch); mslContext.options.shouldFlipVertexY = _shouldFlipVertexY; mslContext.options.mslOptions.argument_buffers = _useMetalArgumentBuffers; - mslContext.options.mslOptions.force_active_argument_buffer_resources = _useMetalArgumentBuffers; + mslContext.options.mslOptions.force_active_argument_buffer_resources = false; mslContext.options.mslOptions.pad_argument_buffer_resources = false; mslContext.options.mslOptions.argument_buffers_tier = SPIRV_CROSS_NAMESPACE::CompilerMSL::Options::ArgumentBuffersTier::Tier2; mslContext.options.mslOptions.replace_recursive_inputs = mvkOSVersionIsAtLeast(14.0, 17.0, 1.0); diff --git a/README.md b/README.md index 24ef17c16..50a0baa3a 100644 --- a/README.md +++ b/README.md @@ -291,8 +291,7 @@ as in the following examples: make MVK_CONFIG_LOG_LEVEL=0 or - - make macos MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS=2 + make macos MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS=1 ...etc. diff --git a/Scripts/runcts b/Scripts/runcts index 982ba2fbf..eb575271b 100755 --- a/Scripts/runcts +++ b/Scripts/runcts @@ -123,7 +123,7 @@ export MVK_CONFIG_DEBUG=0 export MVK_CONFIG_RESUME_LOST_DEVICE=1 export MVK_CONFIG_FAST_MATH_ENABLED=1 export MVK_CONFIG_FORCE_LOW_POWER_GPU=0 -export MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS=1 #(1 = Always, 2 = VK_EXT_descriptor_indexing enabled) +export MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS=1 export MVK_CONFIG_VK_SEMAPHORE_SUPPORT_STYLE=2 #(2 = MTLEvents always) export MVK_CONFIG_SHADER_COMPRESSION_ALGORITHM=0 #(2 = ZLIB, 3 = LZ4) export MVK_CONFIG_PERFORMANCE_TRACKING=0 From a3908bf5f943958530f1987628e3a87267c6b93d Mon Sep 17 00:00:00 2001 From: Bill Hollings Date: Mon, 1 Jul 2024 13:48:08 -0400 Subject: [PATCH 6/7] Update `Whats_New.md` for Metal3 argument buffers. --- Docs/Whats_New.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/Docs/Whats_New.md b/Docs/Whats_New.md index ddfbd0e3b..bcd7bd323 100644 --- a/Docs/Whats_New.md +++ b/Docs/Whats_New.md @@ -18,10 +18,19 @@ MoltenVK 1.2.10 Released TBD +- Improvements to bindless resources and descriptor indexing: + - Add support for Metal3 argument buffers. + - Support argument buffers on all platforms, when Metal 3 is available. + - Support argument buffers on macOS when Metal3 is not available. + - Use Metal argument buffers by default when they are available. + - Revert MVKConfiguration::useMetalArgumentBuffers and env var + `MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS` to a boolean value, and enable it by default. + - Update max number of bindless buffers and textures per stage to 1M, per Apple Docs. - Add option to generate a GPU capture via a temporary named pipe from an external process. - Fix shader conversion failure when using native texture atomics. - MSL shader conversion, only pass resource bindings that apply to current shader stage. - Update documentation for minimum runtime OS requirements to indicate _macOS 10.15_, _iOS 13_, or _tvOS 13_. +- Update `MVK_PRIVATE_API_VERSION` to version `43`. - Update to latest SPIRV-Cross: - MSL: Add option to force depth write in fragment shaders - MSL: Improve handling of padded descriptors with argument buffers From bfb35bd319ba7f0e57437a82e8c190047a6ffcc3 Mon Sep 17 00:00:00 2001 From: Bill Hollings Date: Tue, 2 Jul 2024 14:19:43 -0400 Subject: [PATCH 7/7] Fixes from code review of Metal3 argument buffers. --- Docs/Whats_New.md | 2 +- MoltenVK/MoltenVK/API/mvk_private_api.h | 2 +- MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Docs/Whats_New.md b/Docs/Whats_New.md index bcd7bd323..e3935fe56 100644 --- a/Docs/Whats_New.md +++ b/Docs/Whats_New.md @@ -30,7 +30,7 @@ Released TBD - Fix shader conversion failure when using native texture atomics. - MSL shader conversion, only pass resource bindings that apply to current shader stage. - Update documentation for minimum runtime OS requirements to indicate _macOS 10.15_, _iOS 13_, or _tvOS 13_. -- Update `MVK_PRIVATE_API_VERSION` to version `43`. +- Update `MVK_PRIVATE_API_VERSION` to version `42`. - Update to latest SPIRV-Cross: - MSL: Add option to force depth write in fragment shaders - MSL: Improve handling of padded descriptors with argument buffers diff --git a/MoltenVK/MoltenVK/API/mvk_private_api.h b/MoltenVK/MoltenVK/API/mvk_private_api.h index 4e7e41790..f98e101d6 100644 --- a/MoltenVK/MoltenVK/API/mvk_private_api.h +++ b/MoltenVK/MoltenVK/API/mvk_private_api.h @@ -44,7 +44,7 @@ typedef unsigned long MTLArgumentBuffersTier; */ -#define MVK_PRIVATE_API_VERSION 43 +#define MVK_PRIVATE_API_VERSION 42 #pragma mark - diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.h b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.h index 5966b1ca9..3a475b932 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.h @@ -399,7 +399,7 @@ class MVKInlineUniformBlockDescriptor : public MVKDescriptor { ~MVKInlineUniformBlockDescriptor() { reset(); } protected: - inline uint8_t* getData() { return _mvkMTLBufferAllocation ? (uint8_t*)_mvkMTLBufferAllocation->getContents() : nullptr; } + uint8_t* getData() { return _mvkMTLBufferAllocation ? (uint8_t*)_mvkMTLBufferAllocation->getContents() : nullptr; } MVKMTLBufferAllocation* _mvkMTLBufferAllocation = nullptr; };