diff --git a/MoltenVK/MoltenVK/Commands/MVKCmdDraw.mm b/MoltenVK/MoltenVK/Commands/MVKCmdDraw.mm index bfb1c4ac5..a8888c93c 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCmdDraw.mm +++ b/MoltenVK/MoltenVK/Commands/MVKCmdDraw.mm @@ -141,7 +141,7 @@ case kMVKGraphicsStageVertex: { mtlTessCtlEncoder = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseTessellationVertexTessCtl); if (pipeline->needsVertexOutputBuffer()) { - vtxOutBuff = cmdEncoder->getTempMTLBuffer(_vertexCount * _instanceCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxVertexOutputComponents); + vtxOutBuff = cmdEncoder->getTempMTLBuffer(_vertexCount * _instanceCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxVertexOutputComponents, true); [mtlTessCtlEncoder setBuffer: vtxOutBuff->_mtlBuffer offset: vtxOutBuff->_offset atIndex: pipeline->getOutputBufferIndex().stages[kMVKShaderStageVertex]]; @@ -171,18 +171,18 @@ case kMVKGraphicsStageTessControl: { mtlTessCtlEncoder = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseTessellationVertexTessCtl); if (pipeline->needsTessCtlOutputBuffer()) { - tcOutBuff = cmdEncoder->getTempMTLBuffer(outControlPointCount * tessParams.patchCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxTessellationControlPerVertexOutputComponents); + tcOutBuff = cmdEncoder->getTempMTLBuffer(outControlPointCount * tessParams.patchCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxTessellationControlPerVertexOutputComponents, true); [mtlTessCtlEncoder setBuffer: tcOutBuff->_mtlBuffer offset: tcOutBuff->_offset atIndex: pipeline->getOutputBufferIndex().stages[kMVKShaderStageTessCtl]]; } if (pipeline->needsTessCtlPatchOutputBuffer()) { - tcPatchOutBuff = cmdEncoder->getTempMTLBuffer(tessParams.patchCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxTessellationControlPerPatchOutputComponents); + tcPatchOutBuff = cmdEncoder->getTempMTLBuffer(tessParams.patchCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxTessellationControlPerPatchOutputComponents, true); [mtlTessCtlEncoder setBuffer: tcPatchOutBuff->_mtlBuffer offset: tcPatchOutBuff->_offset atIndex: pipeline->getTessCtlPatchOutputBufferIndex()]; } - tcLevelBuff = cmdEncoder->getTempMTLBuffer(tessParams.patchCount * sizeof(MTLQuadTessellationFactorsHalf)); + tcLevelBuff = cmdEncoder->getTempMTLBuffer(tessParams.patchCount * sizeof(MTLQuadTessellationFactorsHalf), true); [mtlTessCtlEncoder setBuffer: tcLevelBuff->_mtlBuffer offset: tcLevelBuff->_offset atIndex: pipeline->getTessCtlLevelBufferIndex()]; @@ -340,7 +340,7 @@ case kMVKGraphicsStageVertex: { mtlTessCtlEncoder = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseTessellationVertexTessCtl); if (pipeline->needsVertexOutputBuffer()) { - vtxOutBuff = cmdEncoder->getTempMTLBuffer(_indexCount * _instanceCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxVertexOutputComponents); + vtxOutBuff = cmdEncoder->getTempMTLBuffer(_indexCount * _instanceCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxVertexOutputComponents, true); [mtlTessCtlEncoder setBuffer: vtxOutBuff->_mtlBuffer offset: vtxOutBuff->_offset atIndex: pipeline->getOutputBufferIndex().stages[kMVKShaderStageVertex]]; @@ -373,18 +373,18 @@ case kMVKGraphicsStageTessControl: { mtlTessCtlEncoder = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseTessellationVertexTessCtl); if (pipeline->needsTessCtlOutputBuffer()) { - tcOutBuff = cmdEncoder->getTempMTLBuffer(outControlPointCount * tessParams.patchCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxTessellationControlPerVertexOutputComponents); + tcOutBuff = cmdEncoder->getTempMTLBuffer(outControlPointCount * tessParams.patchCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxTessellationControlPerVertexOutputComponents, true); [mtlTessCtlEncoder setBuffer: tcOutBuff->_mtlBuffer offset: tcOutBuff->_offset atIndex: pipeline->getOutputBufferIndex().stages[kMVKShaderStageTessCtl]]; } if (pipeline->needsTessCtlPatchOutputBuffer()) { - tcPatchOutBuff = cmdEncoder->getTempMTLBuffer(tessParams.patchCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxTessellationControlPerPatchOutputComponents); + tcPatchOutBuff = cmdEncoder->getTempMTLBuffer(tessParams.patchCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxTessellationControlPerPatchOutputComponents, true); [mtlTessCtlEncoder setBuffer: tcPatchOutBuff->_mtlBuffer offset: tcPatchOutBuff->_offset atIndex: pipeline->getTessCtlPatchOutputBufferIndex()]; } - tcLevelBuff = cmdEncoder->getTempMTLBuffer(tessParams.patchCount * sizeof(MTLQuadTessellationFactorsHalf)); + tcLevelBuff = cmdEncoder->getTempMTLBuffer(tessParams.patchCount * sizeof(MTLQuadTessellationFactorsHalf), true); [mtlTessCtlEncoder setBuffer: tcLevelBuff->_mtlBuffer offset: tcLevelBuff->_offset atIndex: pipeline->getTessCtlLevelBufferIndex()]; @@ -554,21 +554,21 @@ } paramsIncr = std::max((size_t)cmdEncoder->getDevice()->_pProperties->limits.minUniformBufferOffsetAlignment, sizeof(uint32_t) * 2); VkDeviceSize paramsSize = paramsIncr * _drawCount; - tempIndirectBuff = cmdEncoder->getTempMTLBuffer(indirectSize); + tempIndirectBuff = cmdEncoder->getTempMTLBuffer(indirectSize, true); mtlIndBuff = tempIndirectBuff->_mtlBuffer; mtlIndBuffOfst = tempIndirectBuff->_offset; - tcParamsBuff = cmdEncoder->getTempMTLBuffer(paramsSize); + tcParamsBuff = cmdEncoder->getTempMTLBuffer(paramsSize, true); mtlParmBuffOfst = tcParamsBuff->_offset; if (pipeline->needsVertexOutputBuffer()) { - vtxOutBuff = cmdEncoder->getTempMTLBuffer(vertexCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxVertexOutputComponents); + vtxOutBuff = cmdEncoder->getTempMTLBuffer(vertexCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxVertexOutputComponents, true); } if (pipeline->needsTessCtlOutputBuffer()) { - tcOutBuff = cmdEncoder->getTempMTLBuffer(outControlPointCount * patchCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxTessellationControlPerVertexOutputComponents); + tcOutBuff = cmdEncoder->getTempMTLBuffer(outControlPointCount * patchCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxTessellationControlPerVertexOutputComponents, true); } if (pipeline->needsTessCtlPatchOutputBuffer()) { - tcPatchOutBuff = cmdEncoder->getTempMTLBuffer(patchCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxTessellationControlPerPatchOutputComponents); + tcPatchOutBuff = cmdEncoder->getTempMTLBuffer(patchCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxTessellationControlPerPatchOutputComponents, true); } - tcLevelBuff = cmdEncoder->getTempMTLBuffer(patchCount * sizeof(MTLQuadTessellationFactorsHalf)); + tcLevelBuff = cmdEncoder->getTempMTLBuffer(patchCount * sizeof(MTLQuadTessellationFactorsHalf), true); vtxThreadExecWidth = pipeline->getTessVertexStageState().threadExecutionWidth; NSUInteger sgSize = pipeline->getTessControlStageState().threadExecutionWidth; @@ -580,7 +580,7 @@ } else if (needsInstanceAdjustment) { // In this case, we need to adjust the instance count for the views being drawn. VkDeviceSize indirectSize = sizeof(MTLDrawPrimitivesIndirectArguments) * _drawCount; - tempIndirectBuff = cmdEncoder->getTempMTLBuffer(indirectSize); + tempIndirectBuff = cmdEncoder->getTempMTLBuffer(indirectSize, true); mtlIndBuff = tempIndirectBuff->_mtlBuffer; mtlIndBuffOfst = tempIndirectBuff->_offset; } @@ -869,22 +869,22 @@ } paramsIncr = std::max((size_t)cmdEncoder->getDevice()->_pProperties->limits.minUniformBufferOffsetAlignment, sizeof(uint32_t) * 2); VkDeviceSize paramsSize = paramsIncr * _drawCount; - tempIndirectBuff = cmdEncoder->getTempMTLBuffer(indirectSize); + tempIndirectBuff = cmdEncoder->getTempMTLBuffer(indirectSize, true); mtlIndBuff = tempIndirectBuff->_mtlBuffer; mtlTempIndBuffOfst = tempIndirectBuff->_offset; - tcParamsBuff = cmdEncoder->getTempMTLBuffer(paramsSize); + tcParamsBuff = cmdEncoder->getTempMTLBuffer(paramsSize, true); mtlParmBuffOfst = tcParamsBuff->_offset; if (pipeline->needsVertexOutputBuffer()) { - vtxOutBuff = cmdEncoder->getTempMTLBuffer(vertexCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxVertexOutputComponents); + vtxOutBuff = cmdEncoder->getTempMTLBuffer(vertexCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxVertexOutputComponents, true); } if (pipeline->needsTessCtlOutputBuffer()) { - tcOutBuff = cmdEncoder->getTempMTLBuffer(outControlPointCount * patchCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxTessellationControlPerVertexOutputComponents); + tcOutBuff = cmdEncoder->getTempMTLBuffer(outControlPointCount * patchCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxTessellationControlPerVertexOutputComponents, true); } if (pipeline->needsTessCtlPatchOutputBuffer()) { - tcPatchOutBuff = cmdEncoder->getTempMTLBuffer(patchCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxTessellationControlPerPatchOutputComponents); + tcPatchOutBuff = cmdEncoder->getTempMTLBuffer(patchCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxTessellationControlPerPatchOutputComponents, true); } - tcLevelBuff = cmdEncoder->getTempMTLBuffer(patchCount * sizeof(MTLQuadTessellationFactorsHalf)); - vtxIndexBuff = cmdEncoder->getTempMTLBuffer(ibb.mtlBuffer.length); + tcLevelBuff = cmdEncoder->getTempMTLBuffer(patchCount * sizeof(MTLQuadTessellationFactorsHalf), true); + vtxIndexBuff = cmdEncoder->getTempMTLBuffer(ibb.mtlBuffer.length, true); id vtxState; vtxState = ibb.mtlIndexType == MTLIndexTypeUInt16 ? pipeline->getTessVertexStageIndex16State() : pipeline->getTessVertexStageIndex32State(); @@ -899,7 +899,7 @@ } else if (needsInstanceAdjustment) { // In this case, we need to adjust the instance count for the views being drawn. VkDeviceSize indirectSize = sizeof(MTLDrawIndexedPrimitivesIndirectArguments) * _drawCount; - tempIndirectBuff = cmdEncoder->getTempMTLBuffer(indirectSize); + tempIndirectBuff = cmdEncoder->getTempMTLBuffer(indirectSize, true); mtlIndBuff = tempIndirectBuff->_mtlBuffer; mtlTempIndBuffOfst = tempIndirectBuff->_offset; } diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h index 8fef07aa0..5b3d1b611 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h +++ b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h @@ -370,7 +370,7 @@ class MVKCommandEncoder : public MVKBaseDeviceObject { void setComputeBytes(id mtlEncoder, const void* bytes, NSUInteger length, uint32_t mtlBuffIndex); /** Get a temporary MTLBuffer that will be returned to a pool after the command buffer is finished. */ - const MVKMTLBufferAllocation* getTempMTLBuffer(NSUInteger length, bool dedicated = false); + const MVKMTLBufferAllocation* getTempMTLBuffer(NSUInteger length, bool isPrivate = false, bool isDedicated = false); /** Returns the command encoding pool. */ MVKCommandEncodingPool* getCommandEncodingPool(); diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm index 032e5c27b..c90647bb8 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm +++ b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm @@ -337,7 +337,7 @@ getSubpass()->populateMTLRenderPassDescriptor(mtlRPDesc, _multiviewPassIndex, _framebuffer, _clearValues.contents(), _isRenderingEntireAttachment, loadOverride); if (_cmdBuffer->_needsVisibilityResultMTLBuffer) { if (!_visibilityResultMTLBuffer) { - _visibilityResultMTLBuffer = getTempMTLBuffer(_pDeviceMetalFeatures->maxQueryBufferSize, true); + _visibilityResultMTLBuffer = getTempMTLBuffer(_pDeviceMetalFeatures->maxQueryBufferSize, true, true); } mtlRPDesc.visibilityResultBuffer = _visibilityResultMTLBuffer->_mtlBuffer; } @@ -647,8 +647,8 @@ } } -const MVKMTLBufferAllocation* MVKCommandEncoder::getTempMTLBuffer(NSUInteger length, bool isDedicated) { - const MVKMTLBufferAllocation* mtlBuffAlloc = getCommandEncodingPool()->acquireMTLBufferAllocation(length, isDedicated); +const MVKMTLBufferAllocation* MVKCommandEncoder::getTempMTLBuffer(NSUInteger length, bool isPrivate, bool isDedicated) { + const MVKMTLBufferAllocation* mtlBuffAlloc = getCommandEncodingPool()->acquireMTLBufferAllocation(length, isPrivate, isDedicated); MVKMTLBufferAllocationPool* pool = mtlBuffAlloc->getPool(); // Return the MTLBuffer allocation to the pool once the command buffer is done with it diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandEncodingPool.h b/MoltenVK/MoltenVK/Commands/MVKCommandEncodingPool.h index adaa9e0a4..2b3e8aeff 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandEncodingPool.h +++ b/MoltenVK/MoltenVK/Commands/MVKCommandEncodingPool.h @@ -66,7 +66,7 @@ class MVKCommandEncodingPool : public MVKBaseObject { * To return the returned allocation back to the pool to be reused, * call the returnToPool() function on the returned allocation. */ - const MVKMTLBufferAllocation* acquireMTLBufferAllocation(NSUInteger length, bool isDedicated = false); + const MVKMTLBufferAllocation* acquireMTLBufferAllocation(NSUInteger length, bool isPrivate = false, bool isDedicated = false); /** * Returns a MTLRenderPipelineState dedicated to rendering to several attachments @@ -153,6 +153,7 @@ class MVKCommandEncodingPool : public MVKBaseObject { std::unordered_map _transferBuffers; std::unordered_map _transferBufferMemory; MVKMTLBufferAllocator _mtlBufferAllocator; + MVKMTLBufferAllocator _privateMtlBufferAllocator; MVKMTLBufferAllocator _dedicatedMtlBufferAllocator; id _cmdClearDepthOnlyDepthStencilState = nil; id _cmdClearStencilOnlyDepthStencilState = nil; diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandEncodingPool.mm b/MoltenVK/MoltenVK/Commands/MVKCommandEncodingPool.mm index 39efae65f..16f702ad3 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandEncodingPool.mm +++ b/MoltenVK/MoltenVK/Commands/MVKCommandEncodingPool.mm @@ -77,10 +77,14 @@ MVK_ENC_REZ_ACCESS(_cmdClearDefaultDepthStencilState, newMTLDepthStencilState(useDepth, useStencil)); } -const MVKMTLBufferAllocation* MVKCommandEncodingPool::acquireMTLBufferAllocation(NSUInteger length, bool isDedicated) { +const MVKMTLBufferAllocation* MVKCommandEncodingPool::acquireMTLBufferAllocation(NSUInteger length, bool isPrivate, bool isDedicated) { + MVKAssert(isPrivate || !isDedicated, "Dedicated, host-shared temporary buffers are not supported."); if (isDedicated) { return _dedicatedMtlBufferAllocator.acquireMTLBufferRegion(length); } + if (isPrivate) { + return _privateMtlBufferAllocator.acquireMTLBufferRegion(length); + } return _mtlBufferAllocator.acquireMTLBufferRegion(length); } @@ -163,7 +167,8 @@ static inline uint32_t getClearStateIndex(MVKFormatType type) { MVKCommandEncodingPool::MVKCommandEncodingPool(MVKCommandPool* commandPool) : _commandPool(commandPool), _mtlBufferAllocator(commandPool->getDevice(), commandPool->getDevice()->_pMetalFeatures->maxMTLBufferSize, true), - _dedicatedMtlBufferAllocator(commandPool->getDevice(), commandPool->getDevice()->_pMetalFeatures->maxQueryBufferSize, true, true) { + _privateMtlBufferAllocator(commandPool->getDevice(), commandPool->getDevice()->_pMetalFeatures->maxMTLBufferSize, true, false, MTLStorageModePrivate), + _dedicatedMtlBufferAllocator(commandPool->getDevice(), commandPool->getDevice()->_pMetalFeatures->maxQueryBufferSize, true, true, MTLStorageModePrivate) { } MVKCommandEncodingPool::~MVKCommandEncodingPool() { diff --git a/MoltenVK/MoltenVK/Commands/MVKMTLBufferAllocation.h b/MoltenVK/MoltenVK/Commands/MVKMTLBufferAllocation.h index 84ca12169..c9bc94490 100644 --- a/MoltenVK/MoltenVK/Commands/MVKMTLBufferAllocation.h +++ b/MoltenVK/MoltenVK/Commands/MVKMTLBufferAllocation.h @@ -86,7 +86,7 @@ class MVKMTLBufferAllocationPool : public MVKObjectPool MVKMTLBufferAllocation* newObject() override; /** Configures this instance to dispense MVKMTLBufferAllocation instances of the specified size. */ - MVKMTLBufferAllocationPool(MVKDevice* device, NSUInteger allocationLength, bool isDedicated); + MVKMTLBufferAllocationPool(MVKDevice* device, NSUInteger allocationLength, MTLStorageMode mtlStorageMode, bool isDedicated); ~MVKMTLBufferAllocationPool() override; @@ -97,6 +97,7 @@ class MVKMTLBufferAllocationPool : public MVKObjectPool NSUInteger _nextOffset; NSUInteger _allocationLength; NSUInteger _mtlBufferLength; + MTLStorageMode _mtlStorageMode; MVKSmallVector, 64> _mtlBuffers; MVKDevice* _device; }; @@ -137,7 +138,7 @@ class MVKMTLBufferAllocator : public MVKBaseDeviceObject { * next power-of-two value that is at least as big as the specified maximum size. * If makeThreadSafe is true, a lock will be applied when an allocation is acquired. */ - MVKMTLBufferAllocator(MVKDevice* device, NSUInteger maxRegionLength, bool makeThreadSafe = false, bool isDedicated = false); + MVKMTLBufferAllocator(MVKDevice* device, NSUInteger maxRegionLength, bool makeThreadSafe = false, bool isDedicated = false, MTLStorageMode mtlStorageMode = MTLStorageModeShared); ~MVKMTLBufferAllocator() override; diff --git a/MoltenVK/MoltenVK/Commands/MVKMTLBufferAllocation.mm b/MoltenVK/MoltenVK/Commands/MVKMTLBufferAllocation.mm index a36a50eae..91fb2b1c3 100644 --- a/MoltenVK/MoltenVK/Commands/MVKMTLBufferAllocation.mm +++ b/MoltenVK/MoltenVK/Commands/MVKMTLBufferAllocation.mm @@ -44,17 +44,18 @@ // Adds a new MTLBuffer to the buffer pool and resets the next offset to the start of it void MVKMTLBufferAllocationPool::addMTLBuffer() { - MTLResourceOptions mbOpts = MTLResourceStorageModeShared | MTLResourceCPUCacheModeDefaultCache; + MTLResourceOptions mbOpts = (_mtlStorageMode << MTLResourceStorageModeShift) | MTLResourceCPUCacheModeDefaultCache; _mtlBuffers.push_back([_device->getMTLDevice() newBufferWithLength: _mtlBufferLength options: mbOpts]); _nextOffset = 0; } -MVKMTLBufferAllocationPool::MVKMTLBufferAllocationPool(MVKDevice* device, NSUInteger allocationLength, bool isDedicated) +MVKMTLBufferAllocationPool::MVKMTLBufferAllocationPool(MVKDevice* device, NSUInteger allocationLength, MTLStorageMode mtlStorageMode, bool isDedicated) : MVKObjectPool(true) { _device = device; _allocationLength = allocationLength; _mtlBufferLength = _allocationLength * (isDedicated ? 1 : calcMTLBufferAllocationCount()); + _mtlStorageMode = mtlStorageMode; _nextOffset = _mtlBufferLength; // Force a MTLBuffer to be added on first access } @@ -89,7 +90,7 @@ return region; } -MVKMTLBufferAllocator::MVKMTLBufferAllocator(MVKDevice* device, NSUInteger maxRegionLength, bool makeThreadSafe, bool isDedicated) : MVKBaseDeviceObject(device) { +MVKMTLBufferAllocator::MVKMTLBufferAllocator(MVKDevice* device, NSUInteger maxRegionLength, bool makeThreadSafe, bool isDedicated, MTLStorageMode mtlStorageMode) : MVKBaseDeviceObject(device) { _maxAllocationLength = maxRegionLength; _makeThreadSafe = makeThreadSafe; @@ -100,7 +101,7 @@ _regionPools.reserve(maxP2Exp + 1); NSUInteger allocLen = 1; for (uint32_t p2Exp = 0; p2Exp <= maxP2Exp; p2Exp++) { - _regionPools.push_back(new MVKMTLBufferAllocationPool(device, allocLen, isDedicated)); + _regionPools.push_back(new MVKMTLBufferAllocationPool(device, allocLen, mtlStorageMode, isDedicated)); allocLen <<= 1; } }