From b751c07eb393a54f84c7b7e76401efff6461b43d Mon Sep 17 00:00:00 2001
From: Bill Hollings <bill.hollings@brenwill.com>
Date: Sun, 16 Oct 2022 17:03:55 -0400
Subject: [PATCH 1/2] Fix memory leaks when configured for prefilling Metal
 command buffers.

- Add MVKPrefillMetalCommandBuffersStyle enumeration to enable prefilling,
  and configure memory recovery options.
- Redefine MVKConfiguration::prefillMetalCommandBuffers as a value from
  MVKPrefillMetalCommandBuffersStyle instead of a simple boolean. Set the
  values of MVKPrefillMetalCommandBuffersStyle and the default value of
  MVKConfiguration::prefillMetalCommandBuffers to be compatible with
  legacy use of this setting as a simple enable/disable boolean value.
- MVKCommandEncoder track prefill style, and support option to maintain
  a Metal autorelease pool that is created on vkBeginCommandBuffer()
  and drained on vkEndCommandBuffer(), when prefiling a command buffer
  from a single thread, and an option to wrap each command addition in
  a separate @autoreleasepool{} scope to create and drain an autorelease
  pool at every command addition to the Vulkan command buffer, when
  prefilling a single command buffer from multiple threads.
- MVKCommandEncoder retain Metal encoders when prefilling, because
  prefilling may span multiple autorelease pools.
- MVKCommandEncoder clean up memory leak of MTLBlitPassDescriptor
  during prefilling.
- MVKCommandEncoder rearrange declarations of MVKCommandUse member
  variables to tighten up class memory use (unrelated).
- MVKCommandBuffer remove unnecessary inline declarations (unrelated).
- Change MoltenVK logging to use stdout instead of stderr (unrelated).
---
 Docs/Whats_New.md                             |  2 +
 MoltenVK/MoltenVK/API/vk_mvk_moltenvk.h       | 40 ++++++---
 MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h | 28 ++++---
 .../MoltenVK/Commands/MVKCommandBuffer.mm     | 82 ++++++++++++-------
 MoltenVK/MoltenVK/Utility/MVKBaseObject.mm    |  2 +-
 MoltenVK/MoltenVK/Utility/MVKEnvironment.cpp  |  2 +-
 MoltenVK/MoltenVK/Utility/MVKEnvironment.h    |  4 +-
 7 files changed, 105 insertions(+), 55 deletions(-)

diff --git a/Docs/Whats_New.md b/Docs/Whats_New.md
index 98106d967..38fffccbf 100644
--- a/Docs/Whats_New.md
+++ b/Docs/Whats_New.md
@@ -29,6 +29,8 @@ Released TBD
 - Support config option to automatically use Metal argument buffers when `VK_EXT_descriptor_indexing` 
   extension is enabled. `MVKConfiguration::useMetalArgumentBuffers` (`MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS`) 
   is now an enum field. The use of Metal argument buffers is still disabled by default (`MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS_NEVER`).
+- Fix memory leaks when configured for prefilling Metal command buffers.
+- `MVKConfiguration` replace boolean `prefillMetalCommandBuffers` with enumeration.
 - Fix occassional crash from retention of `MVKSwapchain` for future drawable presentations.
 - Fix undefined reference to `vkGetBufferDeviceAddressEXT` when building with `MVK_HIDE_VULKAN_SYMBOLS=1`.
 - Update `Makefile` to forward any build setting declared on the command line to Xcode.
diff --git a/MoltenVK/MoltenVK/API/vk_mvk_moltenvk.h b/MoltenVK/MoltenVK/API/vk_mvk_moltenvk.h
index c97b41c26..0bc2ed46a 100644
--- a/MoltenVK/MoltenVK/API/vk_mvk_moltenvk.h
+++ b/MoltenVK/MoltenVK/API/vk_mvk_moltenvk.h
@@ -110,9 +110,18 @@ typedef enum MVKVkSemaphoreSupportStyle {
 	MVK_CONFIG_VK_SEMAPHORE_SUPPORT_STYLE_METAL_EVENTS_WHERE_SAFE = 1,	/**< Use Metal events (MTLEvent) when available on the platform, and where safe. This will revert to same as MVK_CONFIG_VK_SEMAPHORE_USE_SINGLE_QUEUE on some NVIDIA GPUs and Rosetta2, due to potential challenges with MTLEvents on those platforms, or in older environments where MTLEvents are not supported. */
 	MVK_CONFIG_VK_SEMAPHORE_SUPPORT_STYLE_METAL_EVENTS            = 2,	/**< Always use Metal events (MTLEvent) when available on the platform. This will revert to same as MVK_CONFIG_VK_SEMAPHORE_USE_SINGLE_QUEUE in older environments where MTLEvents are not supported. */
 	MVK_CONFIG_VK_SEMAPHORE_SUPPORT_STYLE_CALLBACK                = 3,	/**< Use CPU callbacks upon GPU submission completion. This is the slowest technique, but allows multiple queues, compared to MVK_CONFIG_VK_SEMAPHORE_USE_SINGLE_QUEUE. */
-	MVK_CONFIG_VK_SEMAPHORE_MAX_ENUM                              = 0x7FFFFFFF
+	MVK_CONFIG_VK_SEMAPHORE_SUPPORT_STYLE_MAX_ENUM                = 0x7FFFFFFF
 } MVKVkSemaphoreSupportStyle;
 
+/** Identifies the style of Metal command buffer pre-filling to be used. */
+typedef enum MVKPrefillMetalCommandBuffersStyle {
+	MVK_CONFIG_PREFILL_METAL_COMMAND_BUFFERS_STYLE_NO_PREFILL                = 0,	/**< Do not prefill a Metal command buffer during Vulkan command buffer filling. */
+	MVK_CONFIG_PREFILL_METAL_COMMAND_BUFFERS_STYLE_SINGLE_THREAD_AUTORELEASE = 1,	/**< Fill a Metal command buffer during Vulkan command buffer filling, and even though multiple threads can be used to fill multiple Vulkan command buffers, assume for any particular Vulkan command buffer that vkBeginCommandBuffer(), vkEndCommandBuffer() and all commands will be filled from a single thread. MoltenVK creates a Metal object autorelease pool on vkBeginCommandBuffer() and drains it on vkEndCommandBuffer(). This is the typical prefilling scenarios. */
+	MVK_CONFIG_PREFILL_METAL_COMMAND_BUFFERS_STYLE_MULTI_THREAD_AUTORELEASE  = 2,	/**< Fill a Metal command buffer during Vulkan command buffer filling, and assume for any particular Vulkan command buffer that commands will be filled from multiple threads. MoltenVK automatically creates and drains a Metal object autorelease pool for each and every command added to the Vulkan command buffer. This the most general, but slowest, option. */
+	MVK_CONFIG_PREFILL_METAL_COMMAND_BUFFERS_STYLE_NO_AUTORELEASE            = 3,	/**< Fill a Metal command buffer during Vulkan command buffer filling, and assume the app will ensure that each thread that fills commands into a Vulkan command buffer has a Metal autorelease pool. MoltenVK will not create and drain any autorelease pools during prefilling. This is the fastest prefilling option. */
+	MVK_CONFIG_PREFILL_METAL_COMMAND_BUFFERS_STYLE_MAX_ENUM                  = 0x7FFFFFFF
+} MVKPrefillMetalCommandBuffersStyle;
+
 /**
  * MoltenVK configuration settings.
  *
@@ -206,16 +215,26 @@ typedef struct {
 	VkBool32 synchronousQueueSubmits;
 
 	/**
-	 * If enabled, where possible, a Metal command buffer will be created and filled when each
-	 * Vulkan command buffer is filled. For applications that parallelize the filling of Vulkan
+	 * If set to MVK_CONFIG_PREFILL_METAL_COMMAND_BUFFERS_STYLE_NO_PREFILL, a single Metal
+	 * command buffer will be created and filled when the Vulkan command buffers are submitted
+	 * to the Vulkan queue. This allows a single Metal command buffer to be used for all of the
+	 * Vulkan command buffers in a queue submission. The Metal command buffer is filled on the
+	 * thread that processes the command queue submission.
+	 *
+	 * If set to any value other than MVK_CONFIG_PREFILL_METAL_COMMAND_BUFFERS_STYLE_NO_PREFILL,
+	 * where possible, a Metal command buffer will be created and filled when each Vulkan
+	 * command buffer is filled. For applications that parallelize the filling of Vulkan
 	 * commmand buffers across multiple threads, this allows the Metal command buffers to also
 	 * be filled on the same parallel thread. Because each command buffer is filled separately,
 	 * this requires that each Vulkan command buffer requires a dedicated Metal command buffer.
 	 *
-	 * If disabled, a single Metal command buffer will be created and filled when the Vulkan
-	 * command buffers are submitted to the Vulkan queue. This allows a single Metal command
-	 * buffer to be used for all of the Vulkan command buffers in a queue submission. The
-	 * Metal command buffer is filled on the thread that processes the command queue submission.
+	 * See the definition of the MVKPrefillMetalCommandBuffersStyle enumeration above for
+	 * descriptions of the various values that can be used for this setting. The differences
+	 * are primarily distinguished by how memory recovery is handled for autoreleased Metal
+	 * objects that are created under the covers as the commands added to the Vulkan command
+	 * buffer are encoded into the corresponding Metal command buffer. You can decide whether
+	 * your app will recover all autoreleased Metal objects, or how agressively MoltenVK should
+	 * recover autoreleased Metal objects, based on your multi-threaded command buffer filling policy.
 	 *
 	 * Depending on the nature of your application, you may find performance is improved by filling
 	 * the Metal command buffers on parallel threads, or you may find that performance is improved by
@@ -243,14 +262,15 @@ typedef struct {
 	 * The value of this parameter may be changed at any time during application runtime,
 	 * and the changed value will immediately effect subsequent MoltenVK behaviour.
 	 * Specifically, this parameter can be enabled when filling some command buffers,
-	 * and disabled when filling others.
+	 * and disabled when later filling others.
 	 *
 	 * The initial value or this parameter is set by the
 	 * MVK_CONFIG_PREFILL_METAL_COMMAND_BUFFERS
 	 * runtime environment variable or MoltenVK compile-time build setting.
-	 * If neither is set, the value of this parameter defaults to false.
+	 * If neither is set, the value of this parameter defaults to
+	 * MVK_CONFIG_PREFILL_METAL_COMMAND_BUFFERS_STYLE_NO_PREFILL.
 	 */
-	VkBool32 prefillMetalCommandBuffers;
+	MVKPrefillMetalCommandBuffersStyle prefillMetalCommandBuffers;
 
 	/**
 	 * The maximum number of Metal command buffers that can be concurrently active per Vulkan queue.
diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h
index c5fb69143..ae296ee4a 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h
@@ -96,16 +96,16 @@ class MVKCommandBuffer : public MVKDispatchableVulkanAPIObject,
 	void addCommand(MVKCommand* command);
 
 	/** Returns the number of commands currently in this command buffer. */
-	inline uint32_t getCommandCount() { return _commandCount; }
+	uint32_t getCommandCount() { return _commandCount; }
 
 	/** Returns the command pool backing this command buffer. */
-	inline MVKCommandPool* getCommandPool() { return _commandPool; }
+	MVKCommandPool* getCommandPool() { return _commandPool; }
 
 	/** Submit the commands in this buffer as part of the queue submission. */
 	void submit(MVKQueueCommandBufferSubmission* cmdBuffSubmit, MVKCommandEncodingContext* pEncodingContext);
 
     /** Returns whether this command buffer can be submitted to a queue more than once. */
-    inline bool getIsReusable() { return _isReusable; }
+    bool getIsReusable() { return _isReusable; }
 
     /**
      * Metal requires that a visibility buffer is established when a render pass is created, 
@@ -159,13 +159,13 @@ class MVKCommandBuffer : public MVKDispatchableVulkanAPIObject,
      * Returns a reference to this object suitable for use as a Vulkan API handle.
      * This is the compliment of the getMVKCommandBuffer() method.
      */
-    inline VkCommandBuffer getVkCommandBuffer() { return (VkCommandBuffer)getVkHandle(); }
+	VkCommandBuffer getVkCommandBuffer() { return (VkCommandBuffer)getVkHandle(); }
 
     /**
      * Retrieves the MVKCommandBuffer instance referenced by the VkCommandBuffer handle.
      * This is the compliment of the getVkCommandBuffer() method.
      */
-    static inline MVKCommandBuffer* getMVKCommandBuffer(VkCommandBuffer vkCommandBuffer) {
+    static MVKCommandBuffer* getMVKCommandBuffer(VkCommandBuffer vkCommandBuffer) {
         return (MVKCommandBuffer*)getDispatchableObject(vkCommandBuffer);
     }
 
@@ -177,8 +177,6 @@ class MVKCommandBuffer : public MVKDispatchableVulkanAPIObject,
 	void propagateDebugName() override {}
 	void init(const VkCommandBufferAllocateInfo* pAllocateInfo);
 	bool canExecute();
-	bool canPrefill();
-	void prefill();
 	void clearPrefilledMTLCommandBuffer();
     void releaseCommands(MVKCommand* command);
 	void releaseRecordedCommands();
@@ -471,18 +469,22 @@ class MVKCommandEncoder : public MVKBaseDeviceObject {
 
 #pragma mark Construction
 
-	MVKCommandEncoder(MVKCommandBuffer* cmdBuffer);
+	MVKCommandEncoder(MVKCommandBuffer* cmdBuffer,
+					  MVKPrefillMetalCommandBuffersStyle prefillStyle = MVK_CONFIG_PREFILL_METAL_COMMAND_BUFFERS_STYLE_NO_PREFILL);
+
+	~MVKCommandEncoder() override;
 
 protected:
     void addActivatedQueries(MVKQueryPool* pQueryPool, uint32_t query, uint32_t queryCount);
     void finishQueries();
 	void setSubpass(MVKCommand* passCmd, VkSubpassContents subpassContents, uint32_t subpassIndex);
 	void clearRenderArea();
-    NSString* getMTLRenderCommandEncoderName(MVKCommandUse cmdUse);
+	void encodeCommandsImpl(MVKCommand* command);
 	void encodeGPUCounterSample(MVKGPUCounterQueryPool* mvkQryPool, uint32_t sampleIndex, MVKCounterSamplingFlags samplingPoints);
 	void encodeTimestampStageCounterSamples();
 	id<MTLFence> getStageCountersMTLFence();
 	MVKArrayRef<MTLSamplePosition> getCustomSamplePositions();
+	NSString* getMTLRenderCommandEncoderName(MVKCommandUse cmdUse);
 
 	typedef struct GPUCounterQuery {
 		MVKGPUCounterQueryPool* queryPool = nullptr;
@@ -506,12 +508,14 @@ class MVKCommandEncoder : public MVKBaseDeviceObject {
 	MVKPushConstantsCommandEncoderState _fragmentPushConstants;
 	MVKPushConstantsCommandEncoderState _computePushConstants;
     MVKOcclusionQueryCommandEncoderState _occlusionQueryState;
+	NSAutoreleasePool* _prefillAutoreleasePool;
+	MVKPrefillMetalCommandBuffersStyle _prefillStyle;
 	VkSubpassContents _subpassContents;
-	MVKCommandUse _mtlComputeEncoderUse;
-	MVKCommandUse _mtlBlitEncoderUse;
 	uint32_t _renderSubpassIndex;
 	uint32_t _multiviewPassIndex;
-    uint32_t _flushCount = 0;
+    uint32_t _flushCount;
+	MVKCommandUse _mtlComputeEncoderUse;
+	MVKCommandUse _mtlBlitEncoderUse;
 	bool _isRenderingEntireAttachment;
 };
 
diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm
index 0f4c5f530..714bb5ceb 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm
@@ -95,18 +95,13 @@
 		}
 	}
 
-    if(canPrefill()) {
-        @autoreleasepool {
-            uint32_t qIdx = 0;
-            _prefilledMTLCmdBuffer = _commandPool->newMTLCommandBuffer(qIdx);    // retain
-            
-            _immediateCmdEncodingContext = new MVKCommandEncodingContext;
-            
-            _immediateCmdEncoder = new MVKCommandEncoder(this);
-            _immediateCmdEncoder->beginEncoding(_prefilledMTLCmdBuffer, _immediateCmdEncodingContext);
-        }
+    if(_device->shouldPrefillMTLCommandBuffers() && !(_isSecondary || _supportsConcurrentExecution)) {
+        _prefilledMTLCmdBuffer = _commandPool->newMTLCommandBuffer(0);    // retain
+		_immediateCmdEncodingContext = new MVKCommandEncodingContext;
+		_immediateCmdEncoder = new MVKCommandEncoder(this, mvkConfig().prefillMetalCommandBuffers);
+		_immediateCmdEncoder->beginEncoding(_prefilledMTLCmdBuffer, _immediateCmdEncodingContext);
     }
-    
+
     return getConfigurationResult();
 }
 
@@ -177,7 +172,6 @@
 
     if(_immediateCmdEncoder) {
         _immediateCmdEncoder->encodeCommands(command);
-        
         if( !_isReusable ) {
             releaseCommands(command);
             return;
@@ -225,11 +219,6 @@
 	return true;
 }
 
-bool MVKCommandBuffer::canPrefill() {
-	bool wantPrefill = _device->shouldPrefillMTLCommandBuffers();
-	return wantPrefill && !(_isSecondary || _supportsConcurrentExecution);
-}
-
 void MVKCommandBuffer::clearPrefilledMTLCommandBuffer() {
 
 	// Metal command buffers do not return to their pool on release, nor do they support the
@@ -333,7 +322,18 @@
     setLabelIfNotNil(_mtlCmdBuffer, _cmdBuffer->_debugName);
 }
 
+// Multithread autorelease prefill style uses a dedicated autorelease pool when encoding each command.
 void MVKCommandEncoder::encodeCommands(MVKCommand* command) {
+	if (_prefillStyle == MVK_CONFIG_PREFILL_METAL_COMMAND_BUFFERS_STYLE_MULTI_THREAD_AUTORELEASE) {
+		@autoreleasepool {
+			encodeCommandsImpl(command);
+		}
+	} else {
+		encodeCommandsImpl(command);
+	}
+}
+
+void MVKCommandEncoder::encodeCommandsImpl(MVKCommand* command) {
     while(command) {
         uint32_t prevMVPassIdx = _multiviewPassIndex;
         command->encode(this);
@@ -466,6 +466,16 @@
 	_dynamicSamplePositions.assign(dynamicSamplePositions.begin(), dynamicSamplePositions.end());
 }
 
+// Retain encoders when prefilling, because prefilling may span multiple autorelease pools.
+#define retainIfPrefilling(mtlEncIVar)            \
+	if (_prefillStyle) { [mtlEncIVar retain]; }
+
+#define endMetalEncoding(mtlEncIVar)              \
+	[mtlEncIVar endEncoding];                     \
+	if (_prefillStyle) { [mtlEncIVar release]; }  \
+	mtlEncIVar = nil
+
+
 // Creates _mtlRenderEncoder and marks cached render state as dirty so it will be set into the _mtlRenderEncoder.
 void MVKCommandEncoder::beginMetalRenderPass(MVKCommandUse cmdUse) {
 
@@ -525,7 +535,8 @@
 		[mtlRPDesc setSamplePositions: cstmSampPosns.data count: cstmSampPosns.size];
 	}
 
-    _mtlRenderEncoder = [_mtlCmdBuffer renderCommandEncoderWithDescriptor: mtlRPDesc];     // not retained
+    _mtlRenderEncoder = [_mtlCmdBuffer renderCommandEncoderWithDescriptor: mtlRPDesc];
+	retainIfPrefilling(_mtlRenderEncoder);
 	setLabelIfNotNil(_mtlRenderEncoder, getMTLRenderCommandEncoderName(cmdUse));
 
 	// We shouldn't clear the render area if we are restarting the Metal renderpass
@@ -748,8 +759,7 @@
     if (_mtlRenderEncoder == nil) { return; }
 
 	if (_cmdBuffer->_hasStageCounterTimestampCommand) { [_mtlRenderEncoder updateFence: getStageCountersMTLFence() afterStages: MTLRenderStageFragment]; }
-    [_mtlRenderEncoder endEncoding];
-	_mtlRenderEncoder = nil;    // not retained
+	endMetalEncoding(_mtlRenderEncoder);
 
 	getSubpass()->resolveUnresolvableAttachments(this, _attachments.contents());
 
@@ -776,13 +786,11 @@
 	_computePushConstants.markDirty();
 
 	if (_mtlComputeEncoder && _cmdBuffer->_hasStageCounterTimestampCommand) { [_mtlComputeEncoder updateFence: getStageCountersMTLFence()]; }
-	[_mtlComputeEncoder endEncoding];
-	_mtlComputeEncoder = nil;       // not retained
+	endMetalEncoding(_mtlComputeEncoder);
 	_mtlComputeEncoderUse = kMVKCommandUseNone;
 
 	if (_mtlBlitEncoder && _cmdBuffer->_hasStageCounterTimestampCommand) { [_mtlBlitEncoder updateFence: getStageCountersMTLFence()]; }
-	[_mtlBlitEncoder endEncoding];
-	_mtlBlitEncoder = nil;          // not retained
+	endMetalEncoding(_mtlBlitEncoder);
     _mtlBlitEncoderUse = kMVKCommandUseNone;
 
 	encodeTimestampStageCounterSamples();
@@ -791,7 +799,8 @@
 id<MTLComputeCommandEncoder> MVKCommandEncoder::getMTLComputeEncoder(MVKCommandUse cmdUse) {
 	if ( !_mtlComputeEncoder ) {
 		endCurrentMetalEncoding();
-		_mtlComputeEncoder = [_mtlCmdBuffer computeCommandEncoder];		// not retained
+		_mtlComputeEncoder = [_mtlCmdBuffer computeCommandEncoder];
+		retainIfPrefilling(_mtlComputeEncoder);
 		beginMetalComputeEncoding(cmdUse);
 	}
 	if (_mtlComputeEncoderUse != cmdUse) {
@@ -804,7 +813,8 @@
 id<MTLBlitCommandEncoder> MVKCommandEncoder::getMTLBlitEncoder(MVKCommandUse cmdUse) {
 	if ( !_mtlBlitEncoder ) {
 		endCurrentMetalEncoding();
-		_mtlBlitEncoder = [_mtlCmdBuffer blitCommandEncoder];   // not retained
+		_mtlBlitEncoder = [_mtlCmdBuffer blitCommandEncoder];
+		retainIfPrefilling(_mtlBlitEncoder);
 	}
     if (_mtlBlitEncoderUse != cmdUse) {
         _mtlBlitEncoderUse = cmdUse;
@@ -978,7 +988,7 @@
 		// in Xcode 13 as inaccurate for all platforms. Leave this value at 1 until we can figure out how to
 		// accurately determine the length of sampleBufferAttachments on each platform.
 		uint32_t maxMTLBlitPassSampleBuffers = 1;		// Was MTLMaxBlitPassSampleBuffers API definition
-		auto* bpDesc = [[[MTLBlitPassDescriptor alloc] init] autorelease];
+		auto* bpDesc = [MTLBlitPassDescriptor new];		// temp retained
 		for (uint32_t attIdx = 0; attIdx < maxMTLBlitPassSampleBuffers && qIdx < qCnt; attIdx++, qIdx++) {
 			auto* sbAttDesc = bpDesc.sampleBufferAttachments[attIdx];
 			auto& tsQry = _timestampStageCounterQueries[qIdx];
@@ -993,6 +1003,7 @@
 
 		auto* mtlEnc = [_mtlCmdBuffer blitCommandEncoderWithDescriptor: bpDesc];
 		setLabelIfNotNil(mtlEnc, mvkMTLBlitCommandEncoderLabel(kMVKCommandUseRecordGPUCounterSample));
+		[bpDesc release];		// Release temp object
 		[mtlEnc waitForFence: getStageCountersMTLFence()];
 		[mtlEnc fillBuffer: _device->getDummyBlitMTLBuffer() range: NSMakeRange(0, 1) value: 0];
 		[mtlEnc endEncoding];
@@ -1046,7 +1057,8 @@
 
 #pragma mark Construction
 
-MVKCommandEncoder::MVKCommandEncoder(MVKCommandBuffer* cmdBuffer) : MVKBaseDeviceObject(cmdBuffer->getDevice()),
+MVKCommandEncoder::MVKCommandEncoder(MVKCommandBuffer* cmdBuffer,
+									 MVKPrefillMetalCommandBuffersStyle prefillStyle) : MVKBaseDeviceObject(cmdBuffer->getDevice()),
         _cmdBuffer(cmdBuffer),
         _graphicsPipelineState(this),
         _computePipelineState(this),
@@ -1063,8 +1075,11 @@
         _tessEvalPushConstants(this, VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT),
         _fragmentPushConstants(this, VK_SHADER_STAGE_FRAGMENT_BIT),
         _computePushConstants(this, VK_SHADER_STAGE_COMPUTE_BIT),
-        _occlusionQueryState(this) {
+        _occlusionQueryState(this),
+		_prefillStyle(prefillStyle){
 
+			_prefillAutoreleasePool = (_prefillStyle == MVK_CONFIG_PREFILL_METAL_COMMAND_BUFFERS_STYLE_SINGLE_THREAD_AUTORELEASE
+									   ? [NSAutoreleasePool new] : nil);		// retained
             _pDeviceFeatures = &_device->_enabledFeatures;
             _pDeviceMetalFeatures = _device->_pMetalFeatures;
             _pDeviceProperties = _device->_pProperties;
@@ -1078,6 +1093,15 @@
             _mtlBlitEncoderUse = kMVKCommandUseNone;
 			_pEncodingContext = nullptr;
 			_stageCountersMTLFence = nil;
+			_flushCount = 0;
+}
+
+MVKCommandEncoder::~MVKCommandEncoder() {
+	[_prefillAutoreleasePool release];
+	[_mtlRenderEncoder release];
+	[_mtlComputeEncoder release];
+	[_mtlBlitEncoder release];
+	// _stageCountersMTLFence is released after Metal command buffer completion
 }
 
 
diff --git a/MoltenVK/MoltenVK/Utility/MVKBaseObject.mm b/MoltenVK/MoltenVK/Utility/MVKBaseObject.mm
index 7d6806750..2df4ab203 100644
--- a/MoltenVK/MoltenVK/Utility/MVKBaseObject.mm
+++ b/MoltenVK/MoltenVK/Utility/MVKBaseObject.mm
@@ -100,7 +100,7 @@
 	va_end(origArgs);
 
 	// Log the message to the standard error stream
-	if (shouldLog) { fprintf(stderr, "[%s] %s\n", getReportingLevelString(logLevel), pMessage); }
+	if (shouldLog) { fprintf(stdout, "[%s] %s\n", getReportingLevelString(logLevel), pMessage); }
 
 	// Broadcast the message to any Vulkan debug report callbacks
 	if (hasDebugCallbacks) { mvkInst->debugReportMessage(mvkAPIObj, logLevel, pMessage); }
diff --git a/MoltenVK/MoltenVK/Utility/MVKEnvironment.cpp b/MoltenVK/MoltenVK/Utility/MVKEnvironment.cpp
index c3a8805cc..255fb0543 100644
--- a/MoltenVK/MoltenVK/Utility/MVKEnvironment.cpp
+++ b/MoltenVK/MoltenVK/Utility/MVKEnvironment.cpp
@@ -30,7 +30,7 @@ static void mvkInitConfigFromEnvVars() {
 	MVK_SET_FROM_ENV_OR_BUILD_BOOL  (evCfg.debugMode,                              MVK_DEBUG);
 	MVK_SET_FROM_ENV_OR_BUILD_BOOL  (evCfg.shaderConversionFlipVertexY,            MVK_CONFIG_SHADER_CONVERSION_FLIP_VERTEX_Y);
 	MVK_SET_FROM_ENV_OR_BUILD_BOOL  (evCfg.synchronousQueueSubmits,                MVK_CONFIG_SYNCHRONOUS_QUEUE_SUBMITS);
-	MVK_SET_FROM_ENV_OR_BUILD_BOOL  (evCfg.prefillMetalCommandBuffers,             MVK_CONFIG_PREFILL_METAL_COMMAND_BUFFERS);
+	MVK_SET_FROM_ENV_OR_BUILD_INT32 (evCfg.prefillMetalCommandBuffers,             MVK_CONFIG_PREFILL_METAL_COMMAND_BUFFERS);
 	MVK_SET_FROM_ENV_OR_BUILD_INT32 (evCfg.maxActiveMetalCommandBuffersPerQueue,   MVK_CONFIG_MAX_ACTIVE_METAL_COMMAND_BUFFERS_PER_QUEUE);
 	MVK_SET_FROM_ENV_OR_BUILD_BOOL  (evCfg.supportLargeQueryPools,                 MVK_CONFIG_SUPPORT_LARGE_QUERY_POOLS);
 	MVK_SET_FROM_ENV_OR_BUILD_BOOL  (evCfg.presentWithCommandBuffer,               MVK_CONFIG_PRESENT_WITH_COMMAND_BUFFER);
diff --git a/MoltenVK/MoltenVK/Utility/MVKEnvironment.h b/MoltenVK/MoltenVK/Utility/MVKEnvironment.h
index 06abb1702..f7b36ff5f 100644
--- a/MoltenVK/MoltenVK/Utility/MVKEnvironment.h
+++ b/MoltenVK/MoltenVK/Utility/MVKEnvironment.h
@@ -105,9 +105,9 @@ void mvkSetConfig(const MVKConfiguration& mvkConfig);
 #   define MVK_CONFIG_SYNCHRONOUS_QUEUE_SUBMITS    mvkOSVersionIsAtLeast(MVK_CONFIG_MTLEVENT_MIN_OS)
 #endif
 
-/** Fill a Metal command buffers when each Vulkan command buffer is filled. */
+/** Fill a Metal command buffer when each Vulkan command buffer is filled. */
 #ifndef MVK_CONFIG_PREFILL_METAL_COMMAND_BUFFERS
-#   define MVK_CONFIG_PREFILL_METAL_COMMAND_BUFFERS    0
+#   define MVK_CONFIG_PREFILL_METAL_COMMAND_BUFFERS    MVK_CONFIG_PREFILL_METAL_COMMAND_BUFFERS_STYLE_NO_PREFILL
 #endif
 
 /**

From 608342b5287a77af53cf437b786efd6cac7021ad Mon Sep 17 00:00:00 2001
From: Bill Hollings <bill.hollings@brenwill.com>
Date: Tue, 18 Oct 2022 12:41:13 -0400
Subject: [PATCH 2/2] Update documentation for prefilling Metal command buffers
 options.

---
 MoltenVK/MoltenVK/API/vk_mvk_moltenvk.h |  2 +-
 README.md                               | 16 ++++++++--------
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/MoltenVK/MoltenVK/API/vk_mvk_moltenvk.h b/MoltenVK/MoltenVK/API/vk_mvk_moltenvk.h
index 2502c02f2..fa4d957f4 100644
--- a/MoltenVK/MoltenVK/API/vk_mvk_moltenvk.h
+++ b/MoltenVK/MoltenVK/API/vk_mvk_moltenvk.h
@@ -118,7 +118,7 @@ typedef enum MVKPrefillMetalCommandBuffersStyle {
 	MVK_CONFIG_PREFILL_METAL_COMMAND_BUFFERS_STYLE_NO_PREFILL                        = 0,	/**< During Vulkan command buffer filling, do not prefill a Metal command buffer for each Vulkan command buffer. A single Metal command buffer is created and encoded for all the Vulkan command buffers included when vkQueueSubmit() is called. MoltenVK automatically creates and drains a single Metal object autorelease pool when vkQueueSubmit() is called. This is the fastest option, but potentially has the largest memory footprint. */
 	MVK_CONFIG_PREFILL_METAL_COMMAND_BUFFERS_STYLE_DEFERRED_ENCODING                 = 1,	/**< During Vulkan command buffer filling, encode to the Metal command buffer when vkEndCommandBuffer() is called. MoltenVK automatically creates and drains a single Metal object autorelease pool when vkEndCommandBuffer() is called. This option has the fastest performance, and the largest memory footprint, of the prefilling options using autorelease pools. */
 	MVK_CONFIG_PREFILL_METAL_COMMAND_BUFFERS_STYLE_IMMEDIATE_ENCODING                = 2,	/**< During Vulkan command buffer filling, immediately encode to the Metal command buffer, as each command is submitted to the Vulkan command buffer, and do not retain any command content in the Vulkan command buffer. MoltenVK automatically creates and drains a Metal object autorelease pool for each and every command added to the Vulkan command buffer. This option has the smallest memory footprint, and the slowest performance, of the prefilling options using autorelease pools. */
-	MVK_CONFIG_PREFILL_METAL_COMMAND_BUFFERS_STYLE_IMMEDIATE_ENCODING_NO_AUTORELEASE = 3,	/**< During Vulkan command buffer filling, immediately encode to the Metal command buffer, as each command is submitted to the Vulkan command buffer, do not retain any command content in the Vulkan command buffer, and assume the app will ensure that each thread that fills commands into a Vulkan command buffer has a Metal autorelease pool. MoltenVK will not create and drain any autorelease pools during encoding. This is the fastest prefilling option, and has the smallest memory footprint of any option. */
+	MVK_CONFIG_PREFILL_METAL_COMMAND_BUFFERS_STYLE_IMMEDIATE_ENCODING_NO_AUTORELEASE = 3,	/**< During Vulkan command buffer filling, immediately encode to the Metal command buffer, as each command is submitted to the Vulkan command buffer, do not retain any command content in the Vulkan command buffer, and assume the app will ensure that each thread that fills commands into a Vulkan command buffer has a Metal autorelease pool. MoltenVK will not create and drain any autorelease pools during encoding. This is the fastest prefilling option, and generally has a small memory footprint, depending on when the app-provided autorelease pool drains. */
 	MVK_CONFIG_PREFILL_METAL_COMMAND_BUFFERS_STYLE_MAX_ENUM                          = 0x7FFFFFFF
 } MVKPrefillMetalCommandBuffersStyle;
 
diff --git a/README.md b/README.md
index 46cc9d4ff..6b2e2006e 100644
--- a/README.md
+++ b/README.md
@@ -419,14 +419,14 @@ behavior from the *Vulkan* specification.
 ### Memory Management
 
 *Metal*, and other *Objective-C* objects in *Apple's SDK* frameworks, use reference counting for memory management. 
-When instantiating *Objective-C* objects, it is important that you do not rely on implied *autorelease pools* to do 
-memory management for you. Because many *Vulkan* games and apps may be ported from other platforms, they will 
-typically not include autorelease pools in their threading models.
-
-Avoid the use of the `autorelease` method, or any object creation methods that imply use of `autorelease`,
-(eg- `[NSString stringWithFormat: ]`, etc). Instead, favour object creation methods that return a retained object
-(eg- `[[NSString alloc] initWithFormat: ]`, etc), and manually track and release those objects. If you need to use 
-autoreleased objects, wrap code blocks in an `@autoreleasepool {...}` block.
+As a contributor to **MoltenVK**, when instantiating *Objective-C* objects, it is important that you do not rely on 
+the app providing *autorelease pools* to do memory management for you. Because many *Vulkan* games and apps may be 
+ported from other platforms, they will often not automatically include autorelease pools in their threading models.
+
+As a contributor to **MoltenVK**, avoid the use of the *Metal* `autorelease` method, or any object *Metal* creation 
+methods that imply internal use of `autorelease`, (eg- `[NSString stringWithFormat: ]`, etc). Instead, favor object 
+creation methods that return a retained object (eg- `[[NSString alloc] initWithFormat: ]`, etc), and manually track 
+and release those objects. If you need to use autoreleased objects, wrap your code in an `@autoreleasepool {...}` block.
 
 
 ### Code Formatting