Merge pull request KhronosGroup#1744 from billhollings/fix-prefilling…

…-mem-leaks Fix memory leaks when configured for prefilling Metal command buffers.
Kegworks-App · Oct 18, 2022 · 260bad4 · 260bad4
2 parents 7662501 + 608342b
commit 260bad4
Show file tree

Hide file tree

Showing 9 changed files with 147 additions and 72 deletions.
diff --git a/Docs/Whats_New.md b/Docs/Whats_New.md
@@ -29,6 +29,8 @@ Released 2022/10/17
 - Support config option to automatically use Metal argument buffers when `VK_EXT_descriptor_indexing` 
  extension is enabled. `MVKConfiguration::useMetalArgumentBuffers` (`MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS`) 
  is now an enum field. The use of Metal argument buffers is still disabled by default (`MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS_NEVER`).
+- Fix memory leaks when configured for prefilling Metal command buffers.
+- `MVKConfiguration` replace boolean `prefillMetalCommandBuffers` with enumeration.
 - `MVKPipeline`: Add builtins that are read but not written to tessellation pipelines.
 - Fix occassional crash from retention of `MVKSwapchain` for future drawable presentations.
 - Fix crash in `vkCreateSwapchainKHR()` on macOS 10.14 and earlier

diff --git a/MoltenVK/MoltenVK/API/vk_mvk_moltenvk.h b/MoltenVK/MoltenVK/API/vk_mvk_moltenvk.h
@@ -110,9 +110,18 @@ typedef enum MVKVkSemaphoreSupportStyle {
  MVK_CONFIG_VK_SEMAPHORE_SUPPORT_STYLE_METAL_EVENTS_WHERE_SAFE = 1, /**< Use Metal events (MTLEvent) when available on the platform, and where safe. This will revert to same as MVK_CONFIG_VK_SEMAPHORE_USE_SINGLE_QUEUE on some NVIDIA GPUs and Rosetta2, due to potential challenges with MTLEvents on those platforms, or in older environments where MTLEvents are not supported. */
  MVK_CONFIG_VK_SEMAPHORE_SUPPORT_STYLE_METAL_EVENTS = 2, /**< Always use Metal events (MTLEvent) when available on the platform. This will revert to same as MVK_CONFIG_VK_SEMAPHORE_USE_SINGLE_QUEUE in older environments where MTLEvents are not supported. */
  MVK_CONFIG_VK_SEMAPHORE_SUPPORT_STYLE_CALLBACK = 3, /**< Use CPU callbacks upon GPU submission completion. This is the slowest technique, but allows multiple queues, compared to MVK_CONFIG_VK_SEMAPHORE_USE_SINGLE_QUEUE. */
- MVK_CONFIG_VK_SEMAPHORE_MAX_ENUM  = 0x7FFFFFFF
+ MVK_CONFIG_VK_SEMAPHORE_SUPPORT_STYLE_MAX_ENUM = 0x7FFFFFFF
 } MVKVkSemaphoreSupportStyle;
 
+/** Identifies the style of Metal command buffer pre-filling to be used. */
+typedef enum MVKPrefillMetalCommandBuffersStyle {
+ MVK_CONFIG_PREFILL_METAL_COMMAND_BUFFERS_STYLE_NO_PREFILL = 0, /**< During Vulkan command buffer filling, do not prefill a Metal command buffer for each Vulkan command buffer. A single Metal command buffer is created and encoded for all the Vulkan command buffers included when vkQueueSubmit() is called. MoltenVK automatically creates and drains a single Metal object autorelease pool when vkQueueSubmit() is called. This is the fastest option, but potentially has the largest memory footprint. */
+ MVK_CONFIG_PREFILL_METAL_COMMAND_BUFFERS_STYLE_DEFERRED_ENCODING = 1, /**< During Vulkan command buffer filling, encode to the Metal command buffer when vkEndCommandBuffer() is called. MoltenVK automatically creates and drains a single Metal object autorelease pool when vkEndCommandBuffer() is called. This option has the fastest performance, and the largest memory footprint, of the prefilling options using autorelease pools. */
+ MVK_CONFIG_PREFILL_METAL_COMMAND_BUFFERS_STYLE_IMMEDIATE_ENCODING = 2, /**< During Vulkan command buffer filling, immediately encode to the Metal command buffer, as each command is submitted to the Vulkan command buffer, and do not retain any command content in the Vulkan command buffer. MoltenVK automatically creates and drains a Metal object autorelease pool for each and every command added to the Vulkan command buffer. This option has the smallest memory footprint, and the slowest performance, of the prefilling options using autorelease pools. */
+ MVK_CONFIG_PREFILL_METAL_COMMAND_BUFFERS_STYLE_IMMEDIATE_ENCODING_NO_AUTORELEASE = 3, /**< During Vulkan command buffer filling, immediately encode to the Metal command buffer, as each command is submitted to the Vulkan command buffer, do not retain any command content in the Vulkan command buffer, and assume the app will ensure that each thread that fills commands into a Vulkan command buffer has a Metal autorelease pool. MoltenVK will not create and drain any autorelease pools during encoding. This is the fastest prefilling option, and generally has a small memory footprint, depending on when the app-provided autorelease pool drains. */
+ MVK_CONFIG_PREFILL_METAL_COMMAND_BUFFERS_STYLE_MAX_ENUM = 0x7FFFFFFF
+} MVKPrefillMetalCommandBuffersStyle;
+
 /**
  * MoltenVK configuration settings.
  *
@@ -206,25 +215,31 @@ typedef struct {
  VkBool32 synchronousQueueSubmits;
 
  /**
- * If enabled, where possible, a Metal command buffer will be created and filled when each
- * Vulkan command buffer is filled. For applications that parallelize the filling of Vulkan
+ * If set to MVK_CONFIG_PREFILL_METAL_COMMAND_BUFFERS_STYLE_NO_PREFILL, a single Metal
+ * command buffer will be created and filled when the Vulkan command buffers are submitted
+ * to the Vulkan queue. This allows a single Metal command buffer to be used for all of the
+ * Vulkan command buffers in a queue submission. The Metal command buffer is filled on the
+ * thread that processes the command queue submission.
+ *
+ * If set to any value other than MVK_CONFIG_PREFILL_METAL_COMMAND_BUFFERS_STYLE_NO_PREFILL,
+ * where possible, a Metal command buffer will be created and filled when each Vulkan
+ * command buffer is filled. For applications that parallelize the filling of Vulkan
  * commmand buffers across multiple threads, this allows the Metal command buffers to also
  * be filled on the same parallel thread. Because each command buffer is filled separately,
- * this requires that each Vulkan command buffer requires a dedicated Metal command buffer.
+ * this requires that each Vulkan command buffer have a dedicated Metal command buffer.
  *
- * If disabled, a single Metal command buffer will be created and filled when the Vulkan
- * command buffers are submitted to the Vulkan queue. This allows a single Metal command
- * buffer to be used for all of the Vulkan command buffers in a queue submission. The
- * Metal command buffer is filled on the thread that processes the command queue submission.
+ * See the definition of the MVKPrefillMetalCommandBuffersStyle enumeration above for
+ * descriptions of the various values that can be used for this setting. The differences
+ * are primarily distinguished by how memory recovery is handled for autoreleased Metal
+ * objects that are created under the covers as the commands added to the Vulkan command
+ * buffer are encoded into the corresponding Metal command buffer. You can decide whether
+ * your app will recover all autoreleased Metal objects, or how agressively MoltenVK should
+ * recover autoreleased Metal objects, based on your approach to command buffer filling.
  *
  * Depending on the nature of your application, you may find performance is improved by filling
  * the Metal command buffers on parallel threads, or you may find that performance is improved by
  * consolidating all Vulkan command buffers onto a single Metal command buffer during queue submission.
  *
- * Prefilling of a Metal command buffer will not occur during the filling of secondary command
- * buffers (VK_COMMAND_BUFFER_LEVEL_SECONDARY), or for primary command buffers that are intended
- * to be submitted to multiple queues concurrently (VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT).
- *
  * When enabling this feature, be aware that one Metal command buffer is required for each Vulkan
  * command buffer. Depending on the number of command buffers that you use, you may also need to
  * change the value of the maxActiveMetalCommandBuffersPerQueue setting.
@@ -235,6 +250,10 @@ typedef struct {
  * the concept of being reset after being filled. Depending on when and how often you do this,
  * it may cause unexpected visual artifacts and unnecessary GPU load.
  *
+ * Prefilling of a Metal command buffer will not occur during the filling of secondary command
+ * buffers (VK_COMMAND_BUFFER_LEVEL_SECONDARY), or for primary command buffers that are intended
+ * to be submitted to multiple queues concurrently (VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT).
+ *
  * This feature is incompatible with updating descriptors after binding. If any of the
  * *UpdateAfterBind feature flags of VkPhysicalDeviceDescriptorIndexingFeatures or
  * VkPhysicalDeviceInlineUniformBlockFeatures have been enabled, the value of this
@@ -243,14 +262,15 @@ typedef struct {
  * The value of this parameter may be changed at any time during application runtime,
  * and the changed value will immediately effect subsequent MoltenVK behaviour.
  * Specifically, this parameter can be enabled when filling some command buffers,
- * and disabled when filling others.
+ * and disabled when later filling others.
  *
  * The initial value or this parameter is set by the
  * MVK_CONFIG_PREFILL_METAL_COMMAND_BUFFERS
  * runtime environment variable or MoltenVK compile-time build setting.
- * If neither is set, the value of this parameter defaults to false.
+ * If neither is set, the value of this parameter defaults to
+ * MVK_CONFIG_PREFILL_METAL_COMMAND_BUFFERS_STYLE_NO_PREFILL.
  */
- VkBool32 prefillMetalCommandBuffers;
+ MVKPrefillMetalCommandBuffersStyle prefillMetalCommandBuffers;
 
  /**
  * The maximum number of Metal command buffers that can be concurrently active per Vulkan queue.

diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h
@@ -96,16 +96,16 @@ class MVKCommandBuffer : public MVKDispatchableVulkanAPIObject,
  void addCommand(MVKCommand* command);
 
  /** Returns the number of commands currently in this command buffer. */
- inline uint32_t getCommandCount() { return _commandCount; }
+ uint32_t getCommandCount() { return _commandCount; }
 
  /** Returns the command pool backing this command buffer. */
- inline MVKCommandPool* getCommandPool() { return _commandPool; }
+ MVKCommandPool* getCommandPool() { return _commandPool; }
 
  /** Submit the commands in this buffer as part of the queue submission. */
  void submit(MVKQueueCommandBufferSubmission* cmdBuffSubmit, MVKCommandEncodingContext* pEncodingContext);
 
  /** Returns whether this command buffer can be submitted to a queue more than once. */
- inline bool getIsReusable() { return _isReusable; }
+ bool getIsReusable() { return _isReusable; }
 
  /**
  * Metal requires that a visibility buffer is established when a render pass is created, 
@@ -159,13 +159,13 @@ class MVKCommandBuffer : public MVKDispatchableVulkanAPIObject,
  * Returns a reference to this object suitable for use as a Vulkan API handle.
  * This is the compliment of the getMVKCommandBuffer() method.
  */
- inline VkCommandBuffer getVkCommandBuffer() { return (VkCommandBuffer)getVkHandle(); }
+ VkCommandBuffer getVkCommandBuffer() { return (VkCommandBuffer)getVkHandle(); }
 
  /**
  * Retrieves the MVKCommandBuffer instance referenced by the VkCommandBuffer handle.
  * This is the compliment of the getVkCommandBuffer() method.
  */
- static inline MVKCommandBuffer* getMVKCommandBuffer(VkCommandBuffer vkCommandBuffer) {
+ static MVKCommandBuffer* getMVKCommandBuffer(VkCommandBuffer vkCommandBuffer) {
  return (MVKCommandBuffer*)getDispatchableObject(vkCommandBuffer);
  }
 
@@ -177,12 +177,11 @@ class MVKCommandBuffer : public MVKDispatchableVulkanAPIObject,
  void propagateDebugName() override {}
  void init(const VkCommandBufferAllocateInfo* pAllocateInfo);
  bool canExecute();
- bool canPrefill();
- void prefill();
  void clearPrefilledMTLCommandBuffer();
  void releaseCommands(MVKCommand* command);
  void releaseRecordedCommands();
- void flushImmediateCmdEncoder();
+ void flushImmediateCmdEncoder();
+ void checkDeferredEncoding();
 
  MVKCommand* _head = nullptr;
  MVKCommand* _tail = nullptr;
@@ -471,18 +470,24 @@ class MVKCommandEncoder : public MVKBaseDeviceObject {
 
 #pragma mark Construction
 
- MVKCommandEncoder(MVKCommandBuffer* cmdBuffer);
+ MVKCommandEncoder(MVKCommandBuffer* cmdBuffer,
+ MVKPrefillMetalCommandBuffersStyle prefillStyle = MVK_CONFIG_PREFILL_METAL_COMMAND_BUFFERS_STYLE_NO_PREFILL);
+
+ ~MVKCommandEncoder() override;
 
 protected:
  void addActivatedQueries(MVKQueryPool* pQueryPool, uint32_t query, uint32_t queryCount);
  void finishQueries();
  void setSubpass(MVKCommand* passCmd, VkSubpassContents subpassContents, uint32_t subpassIndex);
  void clearRenderArea();
- NSString* getMTLRenderCommandEncoderName(MVKCommandUse cmdUse);
+ void encodeCommandsImpl(MVKCommand* command);
  void encodeGPUCounterSample(MVKGPUCounterQueryPool* mvkQryPool, uint32_t sampleIndex, MVKCounterSamplingFlags samplingPoints);
  void encodeTimestampStageCounterSamples();
  id<MTLFence> getStageCountersMTLFence();
  MVKArrayRef<MTLSamplePosition> getCustomSamplePositions();
+ NSString* getMTLRenderCommandEncoderName(MVKCommandUse cmdUse);
+ template<typename T> void retainIfImmediatelyEncoding(T& mtlEnc);
+ template<typename T> void endMetalEncoding(T& mtlEnc);
 
  typedef struct GPUCounterQuery {
  MVKGPUCounterQueryPool* queryPool = nullptr;
@@ -506,12 +511,13 @@ class MVKCommandEncoder : public MVKBaseDeviceObject {
  MVKPushConstantsCommandEncoderState _fragmentPushConstants;
  MVKPushConstantsCommandEncoderState _computePushConstants;
  MVKOcclusionQueryCommandEncoderState _occlusionQueryState;
+ MVKPrefillMetalCommandBuffersStyle _prefillStyle;
  VkSubpassContents _subpassContents;
- MVKCommandUse _mtlComputeEncoderUse;
- MVKCommandUse _mtlBlitEncoderUse;
  uint32_t _renderSubpassIndex;
  uint32_t _multiviewPassIndex;
- uint32_t _flushCount = 0;
+ uint32_t _flushCount;
+ MVKCommandUse _mtlComputeEncoderUse;
+ MVKCommandUse _mtlBlitEncoderUse;
  bool _isRenderingEntireAttachment;
 };