Merge pull request #1275 from cdavis5e/private-temp-buffers

MVKMTLBufferAllocation: Support private temp buffers.
KhronosGroup · Feb 22, 2021 · d2ddba1 · d2ddba1
2 parents 3058a13 + c225c42
commit d2ddba1
Show file tree

Hide file tree

Showing 7 changed files with 44 additions and 36 deletions.
diff --git a/MoltenVK/MoltenVK/Commands/MVKCmdDraw.mm b/MoltenVK/MoltenVK/Commands/MVKCmdDraw.mm
@@ -141,7 +141,7 @@
  case kMVKGraphicsStageVertex: {
  mtlTessCtlEncoder = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseTessellationVertexTessCtl);
  if (pipeline->needsVertexOutputBuffer()) {
- vtxOutBuff = cmdEncoder->getTempMTLBuffer(_vertexCount * _instanceCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxVertexOutputComponents);
+ vtxOutBuff = cmdEncoder->getTempMTLBuffer(_vertexCount * _instanceCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxVertexOutputComponents, true);
  [mtlTessCtlEncoder setBuffer: vtxOutBuff->_mtlBuffer
  offset: vtxOutBuff->_offset
  atIndex: pipeline->getOutputBufferIndex().stages[kMVKShaderStageVertex]];
@@ -171,18 +171,18 @@
  case kMVKGraphicsStageTessControl: {
  mtlTessCtlEncoder = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseTessellationVertexTessCtl);
  if (pipeline->needsTessCtlOutputBuffer()) {
- tcOutBuff = cmdEncoder->getTempMTLBuffer(outControlPointCount * tessParams.patchCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxTessellationControlPerVertexOutputComponents);
+ tcOutBuff = cmdEncoder->getTempMTLBuffer(outControlPointCount * tessParams.patchCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxTessellationControlPerVertexOutputComponents, true);
  [mtlTessCtlEncoder setBuffer: tcOutBuff->_mtlBuffer
  offset: tcOutBuff->_offset
  atIndex: pipeline->getOutputBufferIndex().stages[kMVKShaderStageTessCtl]];
  }
  if (pipeline->needsTessCtlPatchOutputBuffer()) {
- tcPatchOutBuff = cmdEncoder->getTempMTLBuffer(tessParams.patchCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxTessellationControlPerPatchOutputComponents);
+ tcPatchOutBuff = cmdEncoder->getTempMTLBuffer(tessParams.patchCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxTessellationControlPerPatchOutputComponents, true);
  [mtlTessCtlEncoder setBuffer: tcPatchOutBuff->_mtlBuffer
  offset: tcPatchOutBuff->_offset
  atIndex: pipeline->getTessCtlPatchOutputBufferIndex()];
  }
- tcLevelBuff = cmdEncoder->getTempMTLBuffer(tessParams.patchCount * sizeof(MTLQuadTessellationFactorsHalf));
+ tcLevelBuff = cmdEncoder->getTempMTLBuffer(tessParams.patchCount * sizeof(MTLQuadTessellationFactorsHalf), true);
  [mtlTessCtlEncoder setBuffer: tcLevelBuff->_mtlBuffer
  offset: tcLevelBuff->_offset
  atIndex: pipeline->getTessCtlLevelBufferIndex()];
@@ -340,7 +340,7 @@
  case kMVKGraphicsStageVertex: {
  mtlTessCtlEncoder = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseTessellationVertexTessCtl);
  if (pipeline->needsVertexOutputBuffer()) {
- vtxOutBuff = cmdEncoder->getTempMTLBuffer(_indexCount * _instanceCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxVertexOutputComponents);
+ vtxOutBuff = cmdEncoder->getTempMTLBuffer(_indexCount * _instanceCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxVertexOutputComponents, true);
  [mtlTessCtlEncoder setBuffer: vtxOutBuff->_mtlBuffer
  offset: vtxOutBuff->_offset
  atIndex: pipeline->getOutputBufferIndex().stages[kMVKShaderStageVertex]];
@@ -373,18 +373,18 @@
  case kMVKGraphicsStageTessControl: {
  mtlTessCtlEncoder = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseTessellationVertexTessCtl);
  if (pipeline->needsTessCtlOutputBuffer()) {
- tcOutBuff = cmdEncoder->getTempMTLBuffer(outControlPointCount * tessParams.patchCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxTessellationControlPerVertexOutputComponents);
+ tcOutBuff = cmdEncoder->getTempMTLBuffer(outControlPointCount * tessParams.patchCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxTessellationControlPerVertexOutputComponents, true);
  [mtlTessCtlEncoder setBuffer: tcOutBuff->_mtlBuffer
  offset: tcOutBuff->_offset
  atIndex: pipeline->getOutputBufferIndex().stages[kMVKShaderStageTessCtl]];
  }
  if (pipeline->needsTessCtlPatchOutputBuffer()) {
- tcPatchOutBuff = cmdEncoder->getTempMTLBuffer(tessParams.patchCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxTessellationControlPerPatchOutputComponents);
+ tcPatchOutBuff = cmdEncoder->getTempMTLBuffer(tessParams.patchCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxTessellationControlPerPatchOutputComponents, true);
  [mtlTessCtlEncoder setBuffer: tcPatchOutBuff->_mtlBuffer
  offset: tcPatchOutBuff->_offset
  atIndex: pipeline->getTessCtlPatchOutputBufferIndex()];
  }
- tcLevelBuff = cmdEncoder->getTempMTLBuffer(tessParams.patchCount * sizeof(MTLQuadTessellationFactorsHalf));
+ tcLevelBuff = cmdEncoder->getTempMTLBuffer(tessParams.patchCount * sizeof(MTLQuadTessellationFactorsHalf), true);
  [mtlTessCtlEncoder setBuffer: tcLevelBuff->_mtlBuffer
  offset: tcLevelBuff->_offset
  atIndex: pipeline->getTessCtlLevelBufferIndex()];
@@ -554,21 +554,21 @@
  }
  paramsIncr = std::max((size_t)cmdEncoder->getDevice()->_pProperties->limits.minUniformBufferOffsetAlignment, sizeof(uint32_t) * 2);
  VkDeviceSize paramsSize = paramsIncr * _drawCount;
- tempIndirectBuff = cmdEncoder->getTempMTLBuffer(indirectSize);
+ tempIndirectBuff = cmdEncoder->getTempMTLBuffer(indirectSize, true);
  mtlIndBuff = tempIndirectBuff->_mtlBuffer;
  mtlIndBuffOfst = tempIndirectBuff->_offset;
- tcParamsBuff = cmdEncoder->getTempMTLBuffer(paramsSize);
+ tcParamsBuff = cmdEncoder->getTempMTLBuffer(paramsSize, true);
  mtlParmBuffOfst = tcParamsBuff->_offset;
  if (pipeline->needsVertexOutputBuffer()) {
- vtxOutBuff = cmdEncoder->getTempMTLBuffer(vertexCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxVertexOutputComponents);
+ vtxOutBuff = cmdEncoder->getTempMTLBuffer(vertexCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxVertexOutputComponents, true);
  }
  if (pipeline->needsTessCtlOutputBuffer()) {
- tcOutBuff = cmdEncoder->getTempMTLBuffer(outControlPointCount * patchCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxTessellationControlPerVertexOutputComponents);
+ tcOutBuff = cmdEncoder->getTempMTLBuffer(outControlPointCount * patchCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxTessellationControlPerVertexOutputComponents, true);
  }
  if (pipeline->needsTessCtlPatchOutputBuffer()) {
- tcPatchOutBuff = cmdEncoder->getTempMTLBuffer(patchCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxTessellationControlPerPatchOutputComponents);
+ tcPatchOutBuff = cmdEncoder->getTempMTLBuffer(patchCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxTessellationControlPerPatchOutputComponents, true);
  }
- tcLevelBuff = cmdEncoder->getTempMTLBuffer(patchCount * sizeof(MTLQuadTessellationFactorsHalf));
+ tcLevelBuff = cmdEncoder->getTempMTLBuffer(patchCount * sizeof(MTLQuadTessellationFactorsHalf), true);
 
  vtxThreadExecWidth = pipeline->getTessVertexStageState().threadExecutionWidth;
  NSUInteger sgSize = pipeline->getTessControlStageState().threadExecutionWidth;
@@ -580,7 +580,7 @@
  } else if (needsInstanceAdjustment) {
  // In this case, we need to adjust the instance count for the views being drawn.
  VkDeviceSize indirectSize = sizeof(MTLDrawPrimitivesIndirectArguments) * _drawCount;
- tempIndirectBuff = cmdEncoder->getTempMTLBuffer(indirectSize);
+ tempIndirectBuff = cmdEncoder->getTempMTLBuffer(indirectSize, true);
  mtlIndBuff = tempIndirectBuff->_mtlBuffer;
  mtlIndBuffOfst = tempIndirectBuff->_offset;
  }
@@ -869,22 +869,22 @@
  }
  paramsIncr = std::max((size_t)cmdEncoder->getDevice()->_pProperties->limits.minUniformBufferOffsetAlignment, sizeof(uint32_t) * 2);
  VkDeviceSize paramsSize = paramsIncr * _drawCount;
- tempIndirectBuff = cmdEncoder->getTempMTLBuffer(indirectSize);
+ tempIndirectBuff = cmdEncoder->getTempMTLBuffer(indirectSize, true);
  mtlIndBuff = tempIndirectBuff->_mtlBuffer;
  mtlTempIndBuffOfst = tempIndirectBuff->_offset;
- tcParamsBuff = cmdEncoder->getTempMTLBuffer(paramsSize);
+ tcParamsBuff = cmdEncoder->getTempMTLBuffer(paramsSize, true);
  mtlParmBuffOfst = tcParamsBuff->_offset;
  if (pipeline->needsVertexOutputBuffer()) {
- vtxOutBuff = cmdEncoder->getTempMTLBuffer(vertexCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxVertexOutputComponents);
+ vtxOutBuff = cmdEncoder->getTempMTLBuffer(vertexCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxVertexOutputComponents, true);
  }
  if (pipeline->needsTessCtlOutputBuffer()) {
- tcOutBuff = cmdEncoder->getTempMTLBuffer(outControlPointCount * patchCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxTessellationControlPerVertexOutputComponents);
+ tcOutBuff = cmdEncoder->getTempMTLBuffer(outControlPointCount * patchCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxTessellationControlPerVertexOutputComponents, true);
  }
  if (pipeline->needsTessCtlPatchOutputBuffer()) {
- tcPatchOutBuff = cmdEncoder->getTempMTLBuffer(patchCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxTessellationControlPerPatchOutputComponents);
+ tcPatchOutBuff = cmdEncoder->getTempMTLBuffer(patchCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxTessellationControlPerPatchOutputComponents, true);
  }
- tcLevelBuff = cmdEncoder->getTempMTLBuffer(patchCount * sizeof(MTLQuadTessellationFactorsHalf));
- vtxIndexBuff = cmdEncoder->getTempMTLBuffer(ibb.mtlBuffer.length);
+ tcLevelBuff = cmdEncoder->getTempMTLBuffer(patchCount * sizeof(MTLQuadTessellationFactorsHalf), true);
+ vtxIndexBuff = cmdEncoder->getTempMTLBuffer(ibb.mtlBuffer.length, true);
 
  id<MTLComputePipelineState> vtxState;
  vtxState = ibb.mtlIndexType == MTLIndexTypeUInt16 ? pipeline->getTessVertexStageIndex16State() : pipeline->getTessVertexStageIndex32State();
@@ -899,7 +899,7 @@
  } else if (needsInstanceAdjustment) {
  // In this case, we need to adjust the instance count for the views being drawn.
  VkDeviceSize indirectSize = sizeof(MTLDrawIndexedPrimitivesIndirectArguments) * _drawCount;
- tempIndirectBuff = cmdEncoder->getTempMTLBuffer(indirectSize);
+ tempIndirectBuff = cmdEncoder->getTempMTLBuffer(indirectSize, true);
  mtlIndBuff = tempIndirectBuff->_mtlBuffer;
  mtlTempIndBuffOfst = tempIndirectBuff->_offset;
  }

diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h
@@ -370,7 +370,7 @@ class MVKCommandEncoder : public MVKBaseDeviceObject {
  void setComputeBytes(id<MTLComputeCommandEncoder> mtlEncoder, const void* bytes, NSUInteger length, uint32_t mtlBuffIndex);
 
  /** Get a temporary MTLBuffer that will be returned to a pool after the command buffer is finished. */
- const MVKMTLBufferAllocation* getTempMTLBuffer(NSUInteger length, bool dedicated = false);
+ const MVKMTLBufferAllocation* getTempMTLBuffer(NSUInteger length, bool isPrivate = false, bool isDedicated = false);
 
  /** Returns the command encoding pool. */
  MVKCommandEncodingPool* getCommandEncodingPool();

diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm
@@ -337,7 +337,7 @@
  getSubpass()->populateMTLRenderPassDescriptor(mtlRPDesc, _multiviewPassIndex, _framebuffer, _clearValues.contents(), _isRenderingEntireAttachment, loadOverride);
  if (_cmdBuffer->_needsVisibilityResultMTLBuffer) {
  if (!_visibilityResultMTLBuffer) {
- _visibilityResultMTLBuffer = getTempMTLBuffer(_pDeviceMetalFeatures->maxQueryBufferSize, true);
+ _visibilityResultMTLBuffer = getTempMTLBuffer(_pDeviceMetalFeatures->maxQueryBufferSize, true, true);
  }
  mtlRPDesc.visibilityResultBuffer = _visibilityResultMTLBuffer->_mtlBuffer;
  }
@@ -647,8 +647,8 @@
  }
 }
 
-const MVKMTLBufferAllocation* MVKCommandEncoder::getTempMTLBuffer(NSUInteger length, bool isDedicated) {
- const MVKMTLBufferAllocation* mtlBuffAlloc = getCommandEncodingPool()->acquireMTLBufferAllocation(length, isDedicated);
+const MVKMTLBufferAllocation* MVKCommandEncoder::getTempMTLBuffer(NSUInteger length, bool isPrivate, bool isDedicated) {
+ const MVKMTLBufferAllocation* mtlBuffAlloc = getCommandEncodingPool()->acquireMTLBufferAllocation(length, isPrivate, isDedicated);
  MVKMTLBufferAllocationPool* pool = mtlBuffAlloc->getPool();
 
  // Return the MTLBuffer allocation to the pool once the command buffer is done with it

diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandEncodingPool.h b/MoltenVK/MoltenVK/Commands/MVKCommandEncodingPool.h
@@ -66,7 +66,7 @@ class MVKCommandEncodingPool : public MVKBaseObject {
  * To return the returned allocation back to the pool to be reused,
  * call the returnToPool() function on the returned allocation.
  */
- const MVKMTLBufferAllocation* acquireMTLBufferAllocation(NSUInteger length, bool isDedicated = false);
+ const MVKMTLBufferAllocation* acquireMTLBufferAllocation(NSUInteger length, bool isPrivate = false, bool isDedicated = false);
 
  /**
  * Returns a MTLRenderPipelineState dedicated to rendering to several attachments
@@ -153,6 +153,7 @@ class MVKCommandEncodingPool : public MVKBaseObject {
  std::unordered_map<MVKBufferDescriptorData, MVKBuffer*> _transferBuffers;
  std::unordered_map<MVKBufferDescriptorData, MVKDeviceMemory*> _transferBufferMemory;
  MVKMTLBufferAllocator _mtlBufferAllocator;
+ MVKMTLBufferAllocator _privateMtlBufferAllocator;
  MVKMTLBufferAllocator _dedicatedMtlBufferAllocator;
  id<MTLDepthStencilState> _cmdClearDepthOnlyDepthStencilState = nil;
  id<MTLDepthStencilState> _cmdClearStencilOnlyDepthStencilState = nil;

diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandEncodingPool.mm b/MoltenVK/MoltenVK/Commands/MVKCommandEncodingPool.mm
@@ -77,10 +77,14 @@
  MVK_ENC_REZ_ACCESS(_cmdClearDefaultDepthStencilState, newMTLDepthStencilState(useDepth, useStencil));
 }
 
-const MVKMTLBufferAllocation* MVKCommandEncodingPool::acquireMTLBufferAllocation(NSUInteger length, bool isDedicated) {
+const MVKMTLBufferAllocation* MVKCommandEncodingPool::acquireMTLBufferAllocation(NSUInteger length, bool isPrivate, bool isDedicated) {
+ MVKAssert(isPrivate || !isDedicated, "Dedicated, host-shared temporary buffers are not supported."); 
  if (isDedicated) {
  return _dedicatedMtlBufferAllocator.acquireMTLBufferRegion(length);
  }
+ if (isPrivate) {
+ return _privateMtlBufferAllocator.acquireMTLBufferRegion(length);
+ }
  return _mtlBufferAllocator.acquireMTLBufferRegion(length);
 }
 
@@ -163,7 +167,8 @@ static inline uint32_t getClearStateIndex(MVKFormatType type) {
 
 MVKCommandEncodingPool::MVKCommandEncodingPool(MVKCommandPool* commandPool) : _commandPool(commandPool),
  _mtlBufferAllocator(commandPool->getDevice(), commandPool->getDevice()->_pMetalFeatures->maxMTLBufferSize, true),
- _dedicatedMtlBufferAllocator(commandPool->getDevice(), commandPool->getDevice()->_pMetalFeatures->maxQueryBufferSize, true, true) {
+ _privateMtlBufferAllocator(commandPool->getDevice(), commandPool->getDevice()->_pMetalFeatures->maxMTLBufferSize, true, false, MTLStorageModePrivate),
+ _dedicatedMtlBufferAllocator(commandPool->getDevice(), commandPool->getDevice()->_pMetalFeatures->maxQueryBufferSize, true, true, MTLStorageModePrivate) {
 }
 
 MVKCommandEncodingPool::~MVKCommandEncodingPool() {

diff --git a/MoltenVK/MoltenVK/Commands/MVKMTLBufferAllocation.h b/MoltenVK/MoltenVK/Commands/MVKMTLBufferAllocation.h
@@ -86,7 +86,7 @@ class MVKMTLBufferAllocationPool : public MVKObjectPool<MVKMTLBufferAllocation>
  MVKMTLBufferAllocation* newObject() override;
 
  /** Configures this instance to dispense MVKMTLBufferAllocation instances of the specified size. */
- MVKMTLBufferAllocationPool(MVKDevice* device, NSUInteger allocationLength, bool isDedicated);
+ MVKMTLBufferAllocationPool(MVKDevice* device, NSUInteger allocationLength, MTLStorageMode mtlStorageMode, bool isDedicated);
 
  ~MVKMTLBufferAllocationPool() override;
 
@@ -97,6 +97,7 @@ class MVKMTLBufferAllocationPool : public MVKObjectPool<MVKMTLBufferAllocation>
  NSUInteger _nextOffset;
  NSUInteger _allocationLength;
  NSUInteger _mtlBufferLength;
+ MTLStorageMode _mtlStorageMode;
  MVKSmallVector<id<MTLBuffer>, 64> _mtlBuffers;
  MVKDevice* _device;
 };
@@ -137,7 +138,7 @@ class MVKMTLBufferAllocator : public MVKBaseDeviceObject {
  * next power-of-two value that is at least as big as the specified maximum size.
  * If makeThreadSafe is true, a lock will be applied when an allocation is acquired.
  */
- MVKMTLBufferAllocator(MVKDevice* device, NSUInteger maxRegionLength, bool makeThreadSafe = false, bool isDedicated = false);
+ MVKMTLBufferAllocator(MVKDevice* device, NSUInteger maxRegionLength, bool makeThreadSafe = false, bool isDedicated = false, MTLStorageMode mtlStorageMode = MTLStorageModeShared);
 
  ~MVKMTLBufferAllocator() override;
 

diff --git a/MoltenVK/MoltenVK/Commands/MVKMTLBufferAllocation.mm b/MoltenVK/MoltenVK/Commands/MVKMTLBufferAllocation.mm
@@ -44,17 +44,18 @@
 
 // Adds a new MTLBuffer to the buffer pool and resets the next offset to the start of it
 void MVKMTLBufferAllocationPool::addMTLBuffer() {
- MTLResourceOptions mbOpts = MTLResourceStorageModeShared | MTLResourceCPUCacheModeDefaultCache;
+ MTLResourceOptions mbOpts = (_mtlStorageMode << MTLResourceStorageModeShift) | MTLResourceCPUCacheModeDefaultCache;
  _mtlBuffers.push_back([_device->getMTLDevice() newBufferWithLength: _mtlBufferLength options: mbOpts]);
  _nextOffset = 0;
 }
 
 
-MVKMTLBufferAllocationPool::MVKMTLBufferAllocationPool(MVKDevice* device, NSUInteger allocationLength, bool isDedicated)
+MVKMTLBufferAllocationPool::MVKMTLBufferAllocationPool(MVKDevice* device, NSUInteger allocationLength, MTLStorageMode mtlStorageMode, bool isDedicated)
  : MVKObjectPool<MVKMTLBufferAllocation>(true) {
  _device = device;
  _allocationLength = allocationLength;
  _mtlBufferLength = _allocationLength * (isDedicated ? 1 : calcMTLBufferAllocationCount());
+ _mtlStorageMode = mtlStorageMode;
  _nextOffset = _mtlBufferLength; // Force a MTLBuffer to be added on first access
 }
 
@@ -89,7 +90,7 @@
  return region;
 }
 
-MVKMTLBufferAllocator::MVKMTLBufferAllocator(MVKDevice* device, NSUInteger maxRegionLength, bool makeThreadSafe, bool isDedicated) : MVKBaseDeviceObject(device) {
+MVKMTLBufferAllocator::MVKMTLBufferAllocator(MVKDevice* device, NSUInteger maxRegionLength, bool makeThreadSafe, bool isDedicated, MTLStorageMode mtlStorageMode) : MVKBaseDeviceObject(device) {
  _maxAllocationLength = maxRegionLength;
  _makeThreadSafe = makeThreadSafe;
 
@@ -100,7 +101,7 @@
  _regionPools.reserve(maxP2Exp + 1);
  NSUInteger allocLen = 1;
  for (uint32_t p2Exp = 0; p2Exp <= maxP2Exp; p2Exp++) {
- _regionPools.push_back(new MVKMTLBufferAllocationPool(device, allocLen, isDedicated));
+ _regionPools.push_back(new MVKMTLBufferAllocationPool(device, allocLen, mtlStorageMode, isDedicated));
  allocLen <<= 1;
  }
 }