From b6735878f30f7426610d92cb1a8bccd81cda2793 Mon Sep 17 00:00:00 2001 From: Bill Hollings Date: Tue, 23 Apr 2024 14:51:31 -0400 Subject: [PATCH] VK_EXT_host_image_copy: On discrete GPUs, sync managed-memory textures before copying. Discrete GPUs use managed-memory textures, and these need to be synchronized from GPU memory before being available for host-copying to memory using the CPU. Metal automatically handles the reverse sync when copying from memory to a texture. --- MoltenVK/MoltenVK/GPUObjects/MVKImage.h | 12 +++-- MoltenVK/MoltenVK/GPUObjects/MVKImage.mm | 60 +++++++++++++++------ MoltenVK/MoltenVK/GPUObjects/MVKQueue.h | 1 + MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm | 1 + MoltenVK/MoltenVK/Utility/MVKFoundation.cpp | 1 + MoltenVK/MoltenVK/Utility/MVKFoundation.h | 1 + MoltenVK/MoltenVK/Vulkan/vulkan.mm | 6 +-- 7 files changed, 60 insertions(+), 22 deletions(-) diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKImage.h b/MoltenVK/MoltenVK/GPUObjects/MVKImage.h index 62e809f75..3a416436d 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKImage.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKImage.h @@ -256,11 +256,14 @@ class MVKImage : public MVKVulkanAPIDeviceObject { /** Flush underlying buffer memory into the image if necessary */ void flushToDevice(VkDeviceSize offset, VkDeviceSize size); - /** Host-copy the content of this image to or from memory using the CPU. */ - template VkResult copyContent(const CopyInfo* pCopyInfo); + /** Host-copy the content of an image to another using the CPU. */ + static VkResult copyImageToImage(const VkCopyImageToImageInfoEXT* pCopyImageToImageInfo); + + /** Host-copy the content of an image to memory using the CPU. */ + VkResult copyImageToMemory(const VkCopyImageToMemoryInfoEXT* pCopyImageToMemoryInfo); - /** Host-copy the content of one image to another using the CPU. */ - static VkResult copyContent(const VkCopyImageToImageInfoEXT* pCopyImageToImageInfo); + /** Host-copy the content of an image from memory using the CPU. */ + VkResult copyMemoryToImage(const VkCopyMemoryToImageInfoEXT* pCopyMemoryToImageInfo); #pragma mark Metal @@ -359,6 +362,7 @@ class MVKImage : public MVKVulkanAPIDeviceObject { uint8_t getMemoryBindingCount() const { return (uint8_t)_memoryBindings.size(); } uint8_t getMemoryBindingIndex(uint8_t planeIndex) const; MVKImageMemoryBinding* getMemoryBinding(uint8_t planeIndex); + template VkResult copyContent(const CopyInfo* pCopyInfo); VkResult copyContent(id mtlTex, VkMemoryToImageCopyEXT imgRgn, uint32_t mipLevel, uint32_t slice, void* pImgBytes, size_t rowPitch, size_t depthPitch); diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm b/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm index 488dc64c0..41ce39506 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm @@ -566,7 +566,7 @@ static MTLRegion getMTLRegion(const ImgRgn& imgRgn) { return { mvkMTLOriginFromVkOffset3D(imgRgn.imageOffset), mvkMTLSizeFromVkExtent3D(imgRgn.imageExtent) }; } -// Host-copy from memory to a MTLTexture. +// Host-copy from a MTLTexture to memory. VkResult MVKImage::copyContent(id mtlTex, VkImageToMemoryCopyEXT imgRgn, uint32_t mipLevel, uint32_t slice, void* pImgBytes, size_t rowPitch, size_t depthPitch) { @@ -579,7 +579,7 @@ static MTLRegion getMTLRegion(const ImgRgn& imgRgn) { return VK_SUCCESS; } -// Host-copy from a MTLTexture to memory. +// Host-copy from memory to a MTLTexture. VkResult MVKImage::copyContent(id mtlTex, VkMemoryToImageCopyEXT imgRgn, uint32_t mipLevel, uint32_t slice, void* pImgBytes, size_t rowPitch, size_t depthPitch) { @@ -646,14 +646,9 @@ static MTLRegion getMTLRegion(const ImgRgn& imgRgn) { return VK_SUCCESS; } -// Create concrete implementations of the variations of the copyContent() template function. -// This is required since the template is called from outside this file (compilation unit). -template VkResult MVKImage::copyContent(const VkCopyMemoryToImageInfoEXT* pCopyInfo); -template VkResult MVKImage::copyContent(const VkCopyImageToMemoryInfoEXT* pCopyInfo); - // Host-copy content between images by allocating a temporary memory buffer, copying into it from the // source image, and then copying from the memory buffer into the destination image, all using the CPU. -VkResult MVKImage::copyContent(const VkCopyImageToImageInfoEXT* pCopyImageToImageInfo) { +VkResult MVKImage::copyImageToImage(const VkCopyImageToImageInfoEXT* pCopyImageToImageInfo) { for (uint32_t imgRgnIdx = 0; imgRgnIdx < pCopyImageToImageInfo->regionCount; imgRgnIdx++) { auto& imgRgn = pCopyImageToImageInfo->pRegions[imgRgnIdx]; @@ -716,6 +711,40 @@ static MTLRegion getMTLRegion(const ImgRgn& imgRgn) { return VK_SUCCESS; } +VkResult MVKImage::copyImageToMemory(const VkCopyImageToMemoryInfoEXT* pCopyImageToMemoryInfo) { +#if MVK_MACOS + // On macOS, if the device doesn't have unified memory, and the texture is using managed memory, we need + // to sync the managed memory from the GPU, so the texture content is accessible to be copied by the CPU. + if ( !getPhysicalDevice()->getHasUnifiedMemory() && getMTLStorageMode() == MTLStorageModeManaged ) { + @autoreleasepool { + id mtlCmdBuff = getDevice()->getAnyQueue()->getMTLCommandBuffer(kMVKCommandUseCopyImageToMemory); + id mtlBlitEnc = [mtlCmdBuff blitCommandEncoder]; + + for (uint32_t imgRgnIdx = 0; imgRgnIdx < pCopyImageToMemoryInfo->regionCount; imgRgnIdx++) { + auto& imgRgn = pCopyImageToMemoryInfo->pRegions[imgRgnIdx]; + auto& imgSubRez = imgRgn.imageSubresource; + id mtlTex = getMTLTexture(getPlaneFromVkImageAspectFlags(imgSubRez.aspectMask)); + for (uint32_t imgLyrIdx = 0; imgLyrIdx < imgSubRez.layerCount; imgLyrIdx++) { + [mtlBlitEnc synchronizeTexture: mtlTex + slice: imgSubRez.baseArrayLayer + imgLyrIdx + level: imgSubRez.mipLevel]; + } + } + + [mtlBlitEnc endEncoding]; + [mtlCmdBuff commit]; + [mtlCmdBuff waitUntilCompleted]; + } + } +#endif + + return copyContent(pCopyImageToMemoryInfo); +} + +VkResult MVKImage::copyMemoryToImage(const VkCopyMemoryToImageInfoEXT* pCopyMemoryToImageInfo) { + return copyContent(pCopyMemoryToImageInfo); +} + VkImageType MVKImage::getImageType() { return mvkVkImageTypeFromMTLTextureType(_mtlTextureType); } bool MVKImage::getIsDepthStencil() { return getPixelFormats()->getFormatType(_vkFormat) == kMVKFormatDepthStencil; } @@ -823,21 +852,22 @@ static MTLRegion getMTLRegion(const ImgRgn& imgRgn) { } VkResult MVKImage::getMemoryRequirements(VkMemoryRequirements* pMemoryRequirements, uint8_t planeIndex) { + MVKPhysicalDevice* mvkPD = getPhysicalDevice(); + VkImageUsageFlags combinedUsage = getCombinedUsage(); + pMemoryRequirements->memoryTypeBits = (_isDepthStencilAttachment) - ? getPhysicalDevice()->getPrivateMemoryTypes() - : getPhysicalDevice()->getAllMemoryTypes(); + ? mvkPD->getPrivateMemoryTypes() + : mvkPD->getAllMemoryTypes(); #if MVK_MACOS // Metal on macOS does not provide native support for host-coherent memory, but Vulkan requires it for Linear images if ( !_isLinear ) { - mvkDisableFlags(pMemoryRequirements->memoryTypeBits, getPhysicalDevice()->getHostCoherentMemoryTypes()); + mvkDisableFlags(pMemoryRequirements->memoryTypeBits, mvkPD->getHostCoherentMemoryTypes()); } #endif - VkImageUsageFlags combinedUsage = getCombinedUsage(); - // If the image can be used in a host-copy transfer, the memory cannot be private. if (mvkIsAnyFlagEnabled(combinedUsage, VK_IMAGE_USAGE_HOST_TRANSFER_BIT_EXT)) { - mvkDisableFlags(pMemoryRequirements->memoryTypeBits, getPhysicalDevice()->getPrivateMemoryTypes()); + mvkDisableFlags(pMemoryRequirements->memoryTypeBits, mvkPD->getPrivateMemoryTypes()); } // Only transient attachments may use memoryless storage. @@ -845,7 +875,7 @@ static MTLRegion getMTLRegion(const ImgRgn& imgRgn) { // TODO: support framebuffer fetch so VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT uses color(m) in shader instead of setFragmentTexture:, which crashes Metal if (!mvkIsAnyFlagEnabled(combinedUsage, VK_IMAGE_USAGE_TRANSIENT_ATTACHMENT_BIT) || mvkIsAnyFlagEnabled(combinedUsage, VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT) ) { - mvkDisableFlags(pMemoryRequirements->memoryTypeBits, getPhysicalDevice()->getLazilyAllocatedMemoryTypes()); + mvkDisableFlags(pMemoryRequirements->memoryTypeBits, mvkPD->getLazilyAllocatedMemoryTypes()); } return getMemoryBinding(planeIndex)->getMemoryRequirements(pMemoryRequirements); diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h index 752fcf80a..332546347 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h @@ -155,6 +155,7 @@ class MVKQueue : public MVKDispatchableVulkanAPIObject, public MVKDeviceTracking NSString* _mtlCmdBuffLabelQueueWaitIdle = nil; NSString* _mtlCmdBuffLabelAcquireNextImage = nil; NSString* _mtlCmdBuffLabelInvalidateMappedMemoryRanges = nil; + NSString* _mtlCmdBuffLabelCopyImageToMemory = nil; MVKGPUCaptureScope* _submissionCaptureScope = nil; float _priority; uint32_t _index; diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm index 38b50b162..db424d2e4 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm @@ -195,6 +195,7 @@ CASE_GET_LABEL(DeviceWaitIdle); CASE_GET_LABEL(AcquireNextImage); CASE_GET_LABEL(InvalidateMappedMemoryRanges); + CASE_GET_LABEL(CopyImageToMemory); default: MVKAssert(false, "Uncached MTLCommandBuffer label for command use %s.", mvkVkCommandName(cmdUse)); return [NSString stringWithFormat: @"%s MTLCommandBuffer on Queue %d-%d", mvkVkCommandName(cmdUse), _queueFamily->getIndex(), _index]; diff --git a/MoltenVK/MoltenVK/Utility/MVKFoundation.cpp b/MoltenVK/MoltenVK/Utility/MVKFoundation.cpp index 45490a65d..10706b4dd 100644 --- a/MoltenVK/MoltenVK/Utility/MVKFoundation.cpp +++ b/MoltenVK/MoltenVK/Utility/MVKFoundation.cpp @@ -40,6 +40,7 @@ const char* mvkVkCommandName(MVKCommandUse cmdUse) { case kMVKCommandUseResolveImage: return "vkCmdResolveImage (resolve stage)"; case kMVKCommandUseResolveExpandImage: return "vkCmdResolveImage (expand stage)"; case kMVKCommandUseResolveCopyImage: return "vkCmdResolveImage (copy stage)"; + case kMVKCommandUseCopyImageToMemory: return "vkCopyImageToMemory host sync"; case kMVKCommandUseCopyBuffer: return "vkCmdCopyBuffer"; case kMVKCommandUseCopyBufferToImage: return "vkCmdCopyBufferToImage"; case kMVKCommandUseCopyImageToBuffer: return "vkCmdCopyImageToBuffer"; diff --git a/MoltenVK/MoltenVK/Utility/MVKFoundation.h b/MoltenVK/MoltenVK/Utility/MVKFoundation.h index af7bf71c7..5844c9b7e 100644 --- a/MoltenVK/MoltenVK/Utility/MVKFoundation.h +++ b/MoltenVK/MoltenVK/Utility/MVKFoundation.h @@ -83,6 +83,7 @@ typedef enum : uint8_t { kMVKCommandUseResolveImage, /**< vkCmdResolveImage - resolve stage. */ kMVKCommandUseResolveExpandImage, /**< vkCmdResolveImage - expand stage. */ kMVKCommandUseResolveCopyImage, /**< vkCmdResolveImage - copy stage. */ + kMVKCommandUseCopyImageToMemory, /**< vkCopyImageToMemoryEXT host sync. */ kMVKCommandUseCopyBuffer, /**< vkCmdCopyBuffer. */ kMVKCommandUseCopyBufferToImage, /**< vkCmdCopyBufferToImage. */ kMVKCommandUseCopyImageToBuffer, /**< vkCmdCopyImageToBuffer. */ diff --git a/MoltenVK/MoltenVK/Vulkan/vulkan.mm b/MoltenVK/MoltenVK/Vulkan/vulkan.mm index d2728a18a..107a13163 100644 --- a/MoltenVK/MoltenVK/Vulkan/vulkan.mm +++ b/MoltenVK/MoltenVK/Vulkan/vulkan.mm @@ -3909,7 +3909,7 @@ MVK_PUBLIC_VULKAN_SYMBOL VkResult vkCopyImageToImageEXT( const VkCopyImageToImageInfoEXT* pCopyImageToImageInfo) { MVKTraceVulkanCallStart(); - VkResult rslt = MVKImage::copyContent(pCopyImageToImageInfo); + VkResult rslt = MVKImage::copyImageToImage(pCopyImageToImageInfo); MVKTraceVulkanCallEnd(); return rslt; } @@ -3920,7 +3920,7 @@ MVK_PUBLIC_VULKAN_SYMBOL VkResult vkCopyImageToMemoryEXT( MVKTraceVulkanCallStart(); MVKImage* srcImg = (MVKImage*)pCopyImageToMemoryInfo->srcImage; - VkResult rslt = srcImg->copyContent(pCopyImageToMemoryInfo); + VkResult rslt = srcImg->copyImageToMemory(pCopyImageToMemoryInfo); MVKTraceVulkanCallEnd(); return rslt; } @@ -3931,7 +3931,7 @@ MVK_PUBLIC_VULKAN_SYMBOL VkResult vkCopyMemoryToImageEXT( MVKTraceVulkanCallStart(); MVKImage* dstImg = (MVKImage*)pCopyMemoryToImageInfo->dstImage; - VkResult rslt = dstImg->copyContent(pCopyMemoryToImageInfo); + VkResult rslt = dstImg->copyMemoryToImage(pCopyMemoryToImageInfo); MVKTraceVulkanCallEnd(); return rslt; }