From 190e993b418e0752deade53056c32153fc391072 Mon Sep 17 00:00:00 2001 From: Aitor Camacho Date: Wed, 5 Jun 2024 21:41:12 +0200 Subject: [PATCH] Apply SPIRV-Cross workarounds for Metal bugs 2 workarounds introduced: - Fragments with side effects will now execute following Vulkan - Depth/Stencil write postponed to post fragment execution when input attachment and depth/stencil attachment reference the same resource --- MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h | 1 + MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm | 14 ++++++++++++++ MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.h | 8 ++++++-- MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.mm | 15 +++++++++++++++ 4 files changed, 36 insertions(+), 2 deletions(-) diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h index a4cdce53e..d67576695 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h @@ -456,6 +456,7 @@ class MVKGraphicsPipeline : public MVKPipeline { bool _isRasterizingColor = false; bool _sampleLocationsEnable = false; bool _isTessellationPipeline = false; + bool _inputAttachmentIsDSAttachment = false; }; diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm index e10f03d18..776545c75 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm @@ -507,6 +507,16 @@ _outputControlPointCount = reflectData.numControlPoints; mvkSetOrClear(&_tessInfo, _isTessellationPipeline ? pCreateInfo->pTessellationState : nullptr); + // Handles depth attachment being used as input attachment. However, it does not solve the issue when + // the pipeline is created without render pass (dynamic rendering) since we won't be able to know + // which resources will be used when rendering. Needs to be done before we do shaders + // Potential solution would be to generate 2 pipelines, one with the workaround for the Metal issue + // and one without it, and decide at bind time once we know the resources which one to use. + if (pCreateInfo->renderPass) { + MVKRenderSubpass* subpass = ((MVKRenderPass*)pCreateInfo->renderPass)->getSubpass(pCreateInfo->subpass); + _inputAttachmentIsDSAttachment = subpass->isInputAttachmentDepthStencilAttachment(); + } + // Render pipeline state. Do this as early as possible, to fail fast if pipeline requires a fail on cache-miss. initMTLRenderPipelineState(pCreateInfo, reflectData, pPipelineFB, pVertexSS, pVertexFB, pTessCtlSS, pTessCtlFB, pTessEvalSS, pTessEvalFB, pFragmentSS, pFragmentFB); if ( !_hasValidMTLPipelineStates ) { return; } @@ -1332,6 +1342,8 @@ static MTLVertexFormat mvkAdjustFormatVectorToSize(MTLVertexFormat format, uint3 shaderConfig.options.mslOptions.capture_output_to_buffer = false; shaderConfig.options.mslOptions.fixed_subgroup_size = mvkIsAnyFlagEnabled(pFragmentSS->flags, VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT) ? 0 : mtlFeats.maxSubgroupSize; shaderConfig.options.mslOptions.check_discarded_frag_stores = true; + shaderConfig.options.mslOptions.force_fragment_with_side_effects_execution = true; + shaderConfig.options.mslOptions.input_attachment_is_ds_attachment = _inputAttachmentIsDSAttachment; if (mtlFeats.needsSampleDrefLodArrayWorkaround) { shaderConfig.options.mslOptions.sample_dref_lod_array_as_grad = true; } @@ -2580,6 +2592,8 @@ void serialize(Archive & archive, CompilerMSL::Options& opt) { opt.force_sample_rate_shading, opt.manual_helper_invocation_updates, opt.check_discarded_frag_stores, + opt.force_fragment_with_side_effects_execution, + opt.input_attachment_is_ds_attachment, opt.sample_dref_lod_array_as_grad, opt.replace_recursive_inputs, opt.agx_manual_cube_grad_fixup); diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.h b/MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.h index f0bf37da8..8315ea0fe 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.h @@ -67,10 +67,10 @@ class MVKRenderSubpass : public MVKBaseObject { bool isColorAttachmentAlsoInputAttachment(uint32_t colorAttIdx); /** Returns whether or not the depth attachment is being used. */ - bool isDepthAttachmentUsed() { return _depthAttachment.attachment != VK_ATTACHMENT_UNUSED; } + bool isDepthAttachmentUsed() const { return _depthAttachment.attachment != VK_ATTACHMENT_UNUSED; } /** Returns whether or not the stencil attachment is being used. */ - bool isStencilAttachmentUsed() { return _stencilAttachment.attachment != VK_ATTACHMENT_UNUSED; } + bool isStencilAttachmentUsed() const { return _stencilAttachment.attachment != VK_ATTACHMENT_UNUSED; } /** Return the depth attachment format. */ VkFormat getDepthFormat(); @@ -90,6 +90,9 @@ class MVKRenderSubpass : public MVKBaseObject { /** Returns whether or not this is a multiview subpass. */ bool isMultiview() const { return _pipelineRenderingCreateInfo.viewMask != 0; } + /** Returns whether or not the depth stencil is being used as input attachment */ + bool isInputAttachmentDepthStencilAttachment() const { return _isInputAttachmentDepthStencilAttachment; } + /** Returns the multiview view mask. */ uint32_t getViewMask() const { return _pipelineRenderingCreateInfo.viewMask; } @@ -177,6 +180,7 @@ class MVKRenderSubpass : public MVKBaseObject { VkResolveModeFlagBits _stencilResolveMode = VK_RESOLVE_MODE_NONE; VkSampleCountFlagBits _defaultSampleCount = VK_SAMPLE_COUNT_1_BIT; uint32_t _subpassIndex; + bool _isInputAttachmentDepthStencilAttachment = false; }; diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.mm b/MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.mm index 1ff74a482..7ce1e909e 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.mm @@ -438,6 +438,21 @@ _pipelineRenderingCreateInfo.pColorAttachmentFormats = _colorAttachmentFormats.data(); _pipelineRenderingCreateInfo.depthAttachmentFormat = getDepthFormat(); _pipelineRenderingCreateInfo.stencilAttachmentFormat = getStencilFormat(); + + // Needed to understand if we need to force the depth/stencil write to post fragment execution + // since Metal may try to do the write pre fragment exeuction which is against Vulkan + bool depthAttachmentUsed = isDepthAttachmentUsed(); + bool stencilAttachmentUsed = isStencilAttachmentUsed(); + for (uint32_t i = 0u; i < _inputAttachments.size(); ++i) { + bool isDepthInput = depthAttachmentUsed && (_inputAttachments[i].attachment == _depthAttachment.attachment) && + (_inputAttachments[i].aspectMask & _depthAttachment.aspectMask); + bool isStencilInput = stencilAttachmentUsed && (_inputAttachments[i].attachment == _stencilAttachment.attachment) && + (_inputAttachments[i].aspectMask & _stencilAttachment.aspectMask); + if (isDepthInput || isStencilInput) { + _isInputAttachmentDepthStencilAttachment = true; + break; + } + } } static const VkAttachmentReference2 _unusedAttachment = {VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2, nullptr, VK_ATTACHMENT_UNUSED, VK_IMAGE_LAYOUT_UNDEFINED, 0};