Skip to content

Commit

Permalink
Merge pull request #1275 from cdavis5e/private-temp-buffers
Browse files Browse the repository at this point in the history
MVKMTLBufferAllocation: Support private temp buffers.
  • Loading branch information
billhollings committed Feb 22, 2021
2 parents 3058a13 + c225c42 commit d2ddba1
Show file tree
Hide file tree
Showing 7 changed files with 44 additions and 36 deletions.
46 changes: 23 additions & 23 deletions MoltenVK/MoltenVK/Commands/MVKCmdDraw.mm
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@
case kMVKGraphicsStageVertex: {
mtlTessCtlEncoder = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseTessellationVertexTessCtl);
if (pipeline->needsVertexOutputBuffer()) {
vtxOutBuff = cmdEncoder->getTempMTLBuffer(_vertexCount * _instanceCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxVertexOutputComponents);
vtxOutBuff = cmdEncoder->getTempMTLBuffer(_vertexCount * _instanceCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxVertexOutputComponents, true);
[mtlTessCtlEncoder setBuffer: vtxOutBuff->_mtlBuffer
offset: vtxOutBuff->_offset
atIndex: pipeline->getOutputBufferIndex().stages[kMVKShaderStageVertex]];
Expand Down Expand Up @@ -171,18 +171,18 @@
case kMVKGraphicsStageTessControl: {
mtlTessCtlEncoder = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseTessellationVertexTessCtl);
if (pipeline->needsTessCtlOutputBuffer()) {
tcOutBuff = cmdEncoder->getTempMTLBuffer(outControlPointCount * tessParams.patchCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxTessellationControlPerVertexOutputComponents);
tcOutBuff = cmdEncoder->getTempMTLBuffer(outControlPointCount * tessParams.patchCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxTessellationControlPerVertexOutputComponents, true);
[mtlTessCtlEncoder setBuffer: tcOutBuff->_mtlBuffer
offset: tcOutBuff->_offset
atIndex: pipeline->getOutputBufferIndex().stages[kMVKShaderStageTessCtl]];
}
if (pipeline->needsTessCtlPatchOutputBuffer()) {
tcPatchOutBuff = cmdEncoder->getTempMTLBuffer(tessParams.patchCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxTessellationControlPerPatchOutputComponents);
tcPatchOutBuff = cmdEncoder->getTempMTLBuffer(tessParams.patchCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxTessellationControlPerPatchOutputComponents, true);
[mtlTessCtlEncoder setBuffer: tcPatchOutBuff->_mtlBuffer
offset: tcPatchOutBuff->_offset
atIndex: pipeline->getTessCtlPatchOutputBufferIndex()];
}
tcLevelBuff = cmdEncoder->getTempMTLBuffer(tessParams.patchCount * sizeof(MTLQuadTessellationFactorsHalf));
tcLevelBuff = cmdEncoder->getTempMTLBuffer(tessParams.patchCount * sizeof(MTLQuadTessellationFactorsHalf), true);
[mtlTessCtlEncoder setBuffer: tcLevelBuff->_mtlBuffer
offset: tcLevelBuff->_offset
atIndex: pipeline->getTessCtlLevelBufferIndex()];
Expand Down Expand Up @@ -340,7 +340,7 @@
case kMVKGraphicsStageVertex: {
mtlTessCtlEncoder = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseTessellationVertexTessCtl);
if (pipeline->needsVertexOutputBuffer()) {
vtxOutBuff = cmdEncoder->getTempMTLBuffer(_indexCount * _instanceCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxVertexOutputComponents);
vtxOutBuff = cmdEncoder->getTempMTLBuffer(_indexCount * _instanceCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxVertexOutputComponents, true);
[mtlTessCtlEncoder setBuffer: vtxOutBuff->_mtlBuffer
offset: vtxOutBuff->_offset
atIndex: pipeline->getOutputBufferIndex().stages[kMVKShaderStageVertex]];
Expand Down Expand Up @@ -373,18 +373,18 @@
case kMVKGraphicsStageTessControl: {
mtlTessCtlEncoder = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseTessellationVertexTessCtl);
if (pipeline->needsTessCtlOutputBuffer()) {
tcOutBuff = cmdEncoder->getTempMTLBuffer(outControlPointCount * tessParams.patchCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxTessellationControlPerVertexOutputComponents);
tcOutBuff = cmdEncoder->getTempMTLBuffer(outControlPointCount * tessParams.patchCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxTessellationControlPerVertexOutputComponents, true);
[mtlTessCtlEncoder setBuffer: tcOutBuff->_mtlBuffer
offset: tcOutBuff->_offset
atIndex: pipeline->getOutputBufferIndex().stages[kMVKShaderStageTessCtl]];
}
if (pipeline->needsTessCtlPatchOutputBuffer()) {
tcPatchOutBuff = cmdEncoder->getTempMTLBuffer(tessParams.patchCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxTessellationControlPerPatchOutputComponents);
tcPatchOutBuff = cmdEncoder->getTempMTLBuffer(tessParams.patchCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxTessellationControlPerPatchOutputComponents, true);
[mtlTessCtlEncoder setBuffer: tcPatchOutBuff->_mtlBuffer
offset: tcPatchOutBuff->_offset
atIndex: pipeline->getTessCtlPatchOutputBufferIndex()];
}
tcLevelBuff = cmdEncoder->getTempMTLBuffer(tessParams.patchCount * sizeof(MTLQuadTessellationFactorsHalf));
tcLevelBuff = cmdEncoder->getTempMTLBuffer(tessParams.patchCount * sizeof(MTLQuadTessellationFactorsHalf), true);
[mtlTessCtlEncoder setBuffer: tcLevelBuff->_mtlBuffer
offset: tcLevelBuff->_offset
atIndex: pipeline->getTessCtlLevelBufferIndex()];
Expand Down Expand Up @@ -554,21 +554,21 @@
}
paramsIncr = std::max((size_t)cmdEncoder->getDevice()->_pProperties->limits.minUniformBufferOffsetAlignment, sizeof(uint32_t) * 2);
VkDeviceSize paramsSize = paramsIncr * _drawCount;
tempIndirectBuff = cmdEncoder->getTempMTLBuffer(indirectSize);
tempIndirectBuff = cmdEncoder->getTempMTLBuffer(indirectSize, true);
mtlIndBuff = tempIndirectBuff->_mtlBuffer;
mtlIndBuffOfst = tempIndirectBuff->_offset;
tcParamsBuff = cmdEncoder->getTempMTLBuffer(paramsSize);
tcParamsBuff = cmdEncoder->getTempMTLBuffer(paramsSize, true);
mtlParmBuffOfst = tcParamsBuff->_offset;
if (pipeline->needsVertexOutputBuffer()) {
vtxOutBuff = cmdEncoder->getTempMTLBuffer(vertexCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxVertexOutputComponents);
vtxOutBuff = cmdEncoder->getTempMTLBuffer(vertexCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxVertexOutputComponents, true);
}
if (pipeline->needsTessCtlOutputBuffer()) {
tcOutBuff = cmdEncoder->getTempMTLBuffer(outControlPointCount * patchCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxTessellationControlPerVertexOutputComponents);
tcOutBuff = cmdEncoder->getTempMTLBuffer(outControlPointCount * patchCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxTessellationControlPerVertexOutputComponents, true);
}
if (pipeline->needsTessCtlPatchOutputBuffer()) {
tcPatchOutBuff = cmdEncoder->getTempMTLBuffer(patchCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxTessellationControlPerPatchOutputComponents);
tcPatchOutBuff = cmdEncoder->getTempMTLBuffer(patchCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxTessellationControlPerPatchOutputComponents, true);
}
tcLevelBuff = cmdEncoder->getTempMTLBuffer(patchCount * sizeof(MTLQuadTessellationFactorsHalf));
tcLevelBuff = cmdEncoder->getTempMTLBuffer(patchCount * sizeof(MTLQuadTessellationFactorsHalf), true);

vtxThreadExecWidth = pipeline->getTessVertexStageState().threadExecutionWidth;
NSUInteger sgSize = pipeline->getTessControlStageState().threadExecutionWidth;
Expand All @@ -580,7 +580,7 @@
} else if (needsInstanceAdjustment) {
// In this case, we need to adjust the instance count for the views being drawn.
VkDeviceSize indirectSize = sizeof(MTLDrawPrimitivesIndirectArguments) * _drawCount;
tempIndirectBuff = cmdEncoder->getTempMTLBuffer(indirectSize);
tempIndirectBuff = cmdEncoder->getTempMTLBuffer(indirectSize, true);
mtlIndBuff = tempIndirectBuff->_mtlBuffer;
mtlIndBuffOfst = tempIndirectBuff->_offset;
}
Expand Down Expand Up @@ -869,22 +869,22 @@
}
paramsIncr = std::max((size_t)cmdEncoder->getDevice()->_pProperties->limits.minUniformBufferOffsetAlignment, sizeof(uint32_t) * 2);
VkDeviceSize paramsSize = paramsIncr * _drawCount;
tempIndirectBuff = cmdEncoder->getTempMTLBuffer(indirectSize);
tempIndirectBuff = cmdEncoder->getTempMTLBuffer(indirectSize, true);
mtlIndBuff = tempIndirectBuff->_mtlBuffer;
mtlTempIndBuffOfst = tempIndirectBuff->_offset;
tcParamsBuff = cmdEncoder->getTempMTLBuffer(paramsSize);
tcParamsBuff = cmdEncoder->getTempMTLBuffer(paramsSize, true);
mtlParmBuffOfst = tcParamsBuff->_offset;
if (pipeline->needsVertexOutputBuffer()) {
vtxOutBuff = cmdEncoder->getTempMTLBuffer(vertexCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxVertexOutputComponents);
vtxOutBuff = cmdEncoder->getTempMTLBuffer(vertexCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxVertexOutputComponents, true);
}
if (pipeline->needsTessCtlOutputBuffer()) {
tcOutBuff = cmdEncoder->getTempMTLBuffer(outControlPointCount * patchCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxTessellationControlPerVertexOutputComponents);
tcOutBuff = cmdEncoder->getTempMTLBuffer(outControlPointCount * patchCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxTessellationControlPerVertexOutputComponents, true);
}
if (pipeline->needsTessCtlPatchOutputBuffer()) {
tcPatchOutBuff = cmdEncoder->getTempMTLBuffer(patchCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxTessellationControlPerPatchOutputComponents);
tcPatchOutBuff = cmdEncoder->getTempMTLBuffer(patchCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxTessellationControlPerPatchOutputComponents, true);
}
tcLevelBuff = cmdEncoder->getTempMTLBuffer(patchCount * sizeof(MTLQuadTessellationFactorsHalf));
vtxIndexBuff = cmdEncoder->getTempMTLBuffer(ibb.mtlBuffer.length);
tcLevelBuff = cmdEncoder->getTempMTLBuffer(patchCount * sizeof(MTLQuadTessellationFactorsHalf), true);
vtxIndexBuff = cmdEncoder->getTempMTLBuffer(ibb.mtlBuffer.length, true);

id<MTLComputePipelineState> vtxState;
vtxState = ibb.mtlIndexType == MTLIndexTypeUInt16 ? pipeline->getTessVertexStageIndex16State() : pipeline->getTessVertexStageIndex32State();
Expand All @@ -899,7 +899,7 @@
} else if (needsInstanceAdjustment) {
// In this case, we need to adjust the instance count for the views being drawn.
VkDeviceSize indirectSize = sizeof(MTLDrawIndexedPrimitivesIndirectArguments) * _drawCount;
tempIndirectBuff = cmdEncoder->getTempMTLBuffer(indirectSize);
tempIndirectBuff = cmdEncoder->getTempMTLBuffer(indirectSize, true);
mtlIndBuff = tempIndirectBuff->_mtlBuffer;
mtlTempIndBuffOfst = tempIndirectBuff->_offset;
}
Expand Down
2 changes: 1 addition & 1 deletion MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -370,7 +370,7 @@ class MVKCommandEncoder : public MVKBaseDeviceObject {
void setComputeBytes(id<MTLComputeCommandEncoder> mtlEncoder, const void* bytes, NSUInteger length, uint32_t mtlBuffIndex);

/** Get a temporary MTLBuffer that will be returned to a pool after the command buffer is finished. */
const MVKMTLBufferAllocation* getTempMTLBuffer(NSUInteger length, bool dedicated = false);
const MVKMTLBufferAllocation* getTempMTLBuffer(NSUInteger length, bool isPrivate = false, bool isDedicated = false);

/** Returns the command encoding pool. */
MVKCommandEncodingPool* getCommandEncodingPool();
Expand Down
6 changes: 3 additions & 3 deletions MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm
Original file line number Diff line number Diff line change
Expand Up @@ -337,7 +337,7 @@
getSubpass()->populateMTLRenderPassDescriptor(mtlRPDesc, _multiviewPassIndex, _framebuffer, _clearValues.contents(), _isRenderingEntireAttachment, loadOverride);
if (_cmdBuffer->_needsVisibilityResultMTLBuffer) {
if (!_visibilityResultMTLBuffer) {
_visibilityResultMTLBuffer = getTempMTLBuffer(_pDeviceMetalFeatures->maxQueryBufferSize, true);
_visibilityResultMTLBuffer = getTempMTLBuffer(_pDeviceMetalFeatures->maxQueryBufferSize, true, true);
}
mtlRPDesc.visibilityResultBuffer = _visibilityResultMTLBuffer->_mtlBuffer;
}
Expand Down Expand Up @@ -647,8 +647,8 @@
}
}

const MVKMTLBufferAllocation* MVKCommandEncoder::getTempMTLBuffer(NSUInteger length, bool isDedicated) {
const MVKMTLBufferAllocation* mtlBuffAlloc = getCommandEncodingPool()->acquireMTLBufferAllocation(length, isDedicated);
const MVKMTLBufferAllocation* MVKCommandEncoder::getTempMTLBuffer(NSUInteger length, bool isPrivate, bool isDedicated) {
const MVKMTLBufferAllocation* mtlBuffAlloc = getCommandEncodingPool()->acquireMTLBufferAllocation(length, isPrivate, isDedicated);
MVKMTLBufferAllocationPool* pool = mtlBuffAlloc->getPool();

// Return the MTLBuffer allocation to the pool once the command buffer is done with it
Expand Down
3 changes: 2 additions & 1 deletion MoltenVK/MoltenVK/Commands/MVKCommandEncodingPool.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ class MVKCommandEncodingPool : public MVKBaseObject {
* To return the returned allocation back to the pool to be reused,
* call the returnToPool() function on the returned allocation.
*/
const MVKMTLBufferAllocation* acquireMTLBufferAllocation(NSUInteger length, bool isDedicated = false);
const MVKMTLBufferAllocation* acquireMTLBufferAllocation(NSUInteger length, bool isPrivate = false, bool isDedicated = false);

/**
* Returns a MTLRenderPipelineState dedicated to rendering to several attachments
Expand Down Expand Up @@ -153,6 +153,7 @@ class MVKCommandEncodingPool : public MVKBaseObject {
std::unordered_map<MVKBufferDescriptorData, MVKBuffer*> _transferBuffers;
std::unordered_map<MVKBufferDescriptorData, MVKDeviceMemory*> _transferBufferMemory;
MVKMTLBufferAllocator _mtlBufferAllocator;
MVKMTLBufferAllocator _privateMtlBufferAllocator;
MVKMTLBufferAllocator _dedicatedMtlBufferAllocator;
id<MTLDepthStencilState> _cmdClearDepthOnlyDepthStencilState = nil;
id<MTLDepthStencilState> _cmdClearStencilOnlyDepthStencilState = nil;
Expand Down
9 changes: 7 additions & 2 deletions MoltenVK/MoltenVK/Commands/MVKCommandEncodingPool.mm
Original file line number Diff line number Diff line change
Expand Up @@ -77,10 +77,14 @@
MVK_ENC_REZ_ACCESS(_cmdClearDefaultDepthStencilState, newMTLDepthStencilState(useDepth, useStencil));
}

const MVKMTLBufferAllocation* MVKCommandEncodingPool::acquireMTLBufferAllocation(NSUInteger length, bool isDedicated) {
const MVKMTLBufferAllocation* MVKCommandEncodingPool::acquireMTLBufferAllocation(NSUInteger length, bool isPrivate, bool isDedicated) {
MVKAssert(isPrivate || !isDedicated, "Dedicated, host-shared temporary buffers are not supported.");
if (isDedicated) {
return _dedicatedMtlBufferAllocator.acquireMTLBufferRegion(length);
}
if (isPrivate) {
return _privateMtlBufferAllocator.acquireMTLBufferRegion(length);
}
return _mtlBufferAllocator.acquireMTLBufferRegion(length);
}

Expand Down Expand Up @@ -163,7 +167,8 @@ static inline uint32_t getClearStateIndex(MVKFormatType type) {

MVKCommandEncodingPool::MVKCommandEncodingPool(MVKCommandPool* commandPool) : _commandPool(commandPool),
_mtlBufferAllocator(commandPool->getDevice(), commandPool->getDevice()->_pMetalFeatures->maxMTLBufferSize, true),
_dedicatedMtlBufferAllocator(commandPool->getDevice(), commandPool->getDevice()->_pMetalFeatures->maxQueryBufferSize, true, true) {
_privateMtlBufferAllocator(commandPool->getDevice(), commandPool->getDevice()->_pMetalFeatures->maxMTLBufferSize, true, false, MTLStorageModePrivate),
_dedicatedMtlBufferAllocator(commandPool->getDevice(), commandPool->getDevice()->_pMetalFeatures->maxQueryBufferSize, true, true, MTLStorageModePrivate) {
}

MVKCommandEncodingPool::~MVKCommandEncodingPool() {
Expand Down
5 changes: 3 additions & 2 deletions MoltenVK/MoltenVK/Commands/MVKMTLBufferAllocation.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ class MVKMTLBufferAllocationPool : public MVKObjectPool<MVKMTLBufferAllocation>
MVKMTLBufferAllocation* newObject() override;

/** Configures this instance to dispense MVKMTLBufferAllocation instances of the specified size. */
MVKMTLBufferAllocationPool(MVKDevice* device, NSUInteger allocationLength, bool isDedicated);
MVKMTLBufferAllocationPool(MVKDevice* device, NSUInteger allocationLength, MTLStorageMode mtlStorageMode, bool isDedicated);

~MVKMTLBufferAllocationPool() override;

Expand All @@ -97,6 +97,7 @@ class MVKMTLBufferAllocationPool : public MVKObjectPool<MVKMTLBufferAllocation>
NSUInteger _nextOffset;
NSUInteger _allocationLength;
NSUInteger _mtlBufferLength;
MTLStorageMode _mtlStorageMode;
MVKSmallVector<id<MTLBuffer>, 64> _mtlBuffers;
MVKDevice* _device;
};
Expand Down Expand Up @@ -137,7 +138,7 @@ class MVKMTLBufferAllocator : public MVKBaseDeviceObject {
* next power-of-two value that is at least as big as the specified maximum size.
* If makeThreadSafe is true, a lock will be applied when an allocation is acquired.
*/
MVKMTLBufferAllocator(MVKDevice* device, NSUInteger maxRegionLength, bool makeThreadSafe = false, bool isDedicated = false);
MVKMTLBufferAllocator(MVKDevice* device, NSUInteger maxRegionLength, bool makeThreadSafe = false, bool isDedicated = false, MTLStorageMode mtlStorageMode = MTLStorageModeShared);

~MVKMTLBufferAllocator() override;

Expand Down
9 changes: 5 additions & 4 deletions MoltenVK/MoltenVK/Commands/MVKMTLBufferAllocation.mm
Original file line number Diff line number Diff line change
Expand Up @@ -44,17 +44,18 @@

// Adds a new MTLBuffer to the buffer pool and resets the next offset to the start of it
void MVKMTLBufferAllocationPool::addMTLBuffer() {
MTLResourceOptions mbOpts = MTLResourceStorageModeShared | MTLResourceCPUCacheModeDefaultCache;
MTLResourceOptions mbOpts = (_mtlStorageMode << MTLResourceStorageModeShift) | MTLResourceCPUCacheModeDefaultCache;
_mtlBuffers.push_back([_device->getMTLDevice() newBufferWithLength: _mtlBufferLength options: mbOpts]);
_nextOffset = 0;
}


MVKMTLBufferAllocationPool::MVKMTLBufferAllocationPool(MVKDevice* device, NSUInteger allocationLength, bool isDedicated)
MVKMTLBufferAllocationPool::MVKMTLBufferAllocationPool(MVKDevice* device, NSUInteger allocationLength, MTLStorageMode mtlStorageMode, bool isDedicated)
: MVKObjectPool<MVKMTLBufferAllocation>(true) {
_device = device;
_allocationLength = allocationLength;
_mtlBufferLength = _allocationLength * (isDedicated ? 1 : calcMTLBufferAllocationCount());
_mtlStorageMode = mtlStorageMode;
_nextOffset = _mtlBufferLength; // Force a MTLBuffer to be added on first access
}

Expand Down Expand Up @@ -89,7 +90,7 @@
return region;
}

MVKMTLBufferAllocator::MVKMTLBufferAllocator(MVKDevice* device, NSUInteger maxRegionLength, bool makeThreadSafe, bool isDedicated) : MVKBaseDeviceObject(device) {
MVKMTLBufferAllocator::MVKMTLBufferAllocator(MVKDevice* device, NSUInteger maxRegionLength, bool makeThreadSafe, bool isDedicated, MTLStorageMode mtlStorageMode) : MVKBaseDeviceObject(device) {
_maxAllocationLength = maxRegionLength;
_makeThreadSafe = makeThreadSafe;

Expand All @@ -100,7 +101,7 @@
_regionPools.reserve(maxP2Exp + 1);
NSUInteger allocLen = 1;
for (uint32_t p2Exp = 0; p2Exp <= maxP2Exp; p2Exp++) {
_regionPools.push_back(new MVKMTLBufferAllocationPool(device, allocLen, isDedicated));
_regionPools.push_back(new MVKMTLBufferAllocationPool(device, allocLen, mtlStorageMode, isDedicated));
allocLen <<= 1;
}
}
Expand Down

0 comments on commit d2ddba1

Please sign in to comment.