From 2290e86cd901fe17cad33a71b4a6b3284fedd728 Mon Sep 17 00:00:00 2001 From: Bill Hollings Date: Sun, 14 Apr 2024 22:10:34 -0400 Subject: [PATCH] Add support for VK_EXT_host_image_copy extension. - MVKResource::getHostMemoryAddress() return nullptr if MVKDeviceMemory::getHostMemoryAddress() returns null pointer, regardless of local offset. - Remove unnecessary enum value kMVKVkFormatFeatureFlagsTexTransfer to reduce redundancy between read and transfer feature flag options. - Fix spelling of mvkVkOffset3DFromMTLOrigin() (unrelated). - MVKResource remove unnecessary inline qualifiers (unrelated). - MVKDevice remove some obsolete commentary (unrelated). --- Docs/MoltenVK_Runtime_UserGuide.md | 1 + Docs/Whats_New.md | 2 + MoltenVK/MoltenVK/API/mvk_datatypes.h | 2 +- MoltenVK/MoltenVK/GPUObjects/MVKDevice.h | 1 + MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm | 121 +++++++--- .../GPUObjects/MVKDeviceFeatureStructs.def | 1 + .../MoltenVK/GPUObjects/MVKDeviceMemory.h | 4 +- MoltenVK/MoltenVK/GPUObjects/MVKImage.h | 17 ++ MoltenVK/MoltenVK/GPUObjects/MVKImage.mm | 208 +++++++++++++++++- MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm | 5 + .../MoltenVK/GPUObjects/MVKPixelFormats.mm | 4 +- MoltenVK/MoltenVK/GPUObjects/MVKResource.h | 19 +- MoltenVK/MoltenVK/Layers/MVKExtensions.def | 1 + MoltenVK/MoltenVK/Vulkan/vulkan.mm | 59 +++++ 14 files changed, 388 insertions(+), 57 deletions(-) diff --git a/Docs/MoltenVK_Runtime_UserGuide.md b/Docs/MoltenVK_Runtime_UserGuide.md index cad4e76db..d75880312 100644 --- a/Docs/MoltenVK_Runtime_UserGuide.md +++ b/Docs/MoltenVK_Runtime_UserGuide.md @@ -315,6 +315,7 @@ In addition to core *Vulkan* functionality, **MoltenVK** also supports the foll - `VK_EXT_hdr_metadata` - *macOS only.* - `VK_EXT_headless_surface` +- `VK_EXT_host_image_copy` - `VK_EXT_host_query_reset` - `VK_EXT_image_robustness` - `VK_EXT_inline_uniform_block` diff --git a/Docs/Whats_New.md b/Docs/Whats_New.md index c49dcc4ba..2d3f8eab7 100644 --- a/Docs/Whats_New.md +++ b/Docs/Whats_New.md @@ -18,6 +18,8 @@ MoltenVK 1.2.9 Released TBD +- Add support for extensions: + - `VK_EXT_host_image_copy` - To support legacy apps, restore `MoltenVK/dylib` directory via symlink to `MoltenVK/dynamic/dylib`. - Add `MVKPerformanceTracker::previous` to track latest-but-one performance measurements. - Fix crash when using `VK_EXT_metal_objects` under _ARC_. diff --git a/MoltenVK/MoltenVK/API/mvk_datatypes.h b/MoltenVK/MoltenVK/API/mvk_datatypes.h index 902dc0cba..9ef77ce7e 100644 --- a/MoltenVK/MoltenVK/API/mvk_datatypes.h +++ b/MoltenVK/MoltenVK/API/mvk_datatypes.h @@ -452,7 +452,7 @@ static inline MTLOrigin mvkMTLOriginFromVkOffset3D(VkOffset3D vkOffset) { } /** Returns a Vulkan VkOffset3D constructed from a Metal MTLOrigin. */ -static inline VkOffset3D mvkVkOffset3DFromMTLSize(MTLOrigin mtlOrigin) { +static inline VkOffset3D mvkVkOffset3DFromMTLOrigin(MTLOrigin mtlOrigin) { return { (int32_t)mtlOrigin.x, (int32_t)mtlOrigin.y, (int32_t)mtlOrigin.z }; } diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h index 0102fa1a9..f7062f15f 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h @@ -417,6 +417,7 @@ class MVKPhysicalDevice : public MVKDispatchableVulkanAPIObject { uint32_t getMoltenVKGitRevision(); void populateDeviceIDProperties(VkPhysicalDeviceVulkan11Properties* pVk11Props); void populateSubgroupProperties(VkPhysicalDeviceVulkan11Properties* pVk11Props); + void populateHostImageCopyProperties(VkPhysicalDeviceHostImageCopyPropertiesEXT* pHostImageCopyProps); void logGPUInfo(); id _mtlDevice; diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm index c17740010..9765f34ff 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm @@ -443,6 +443,11 @@ interlockFeatures->fragmentShaderShadingRateInterlock = false; // Requires variable rate shading; not supported yet in Metal break; } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_HOST_IMAGE_COPY_FEATURES_EXT: { + auto* hostImageCopyFeatures = (VkPhysicalDeviceHostImageCopyFeaturesEXT*)next; + hostImageCopyFeatures->hostImageCopy = true; + break; + } case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_CREATION_CACHE_CONTROL_FEATURES_EXT: { auto* pipelineCreationCacheControlFeatures = (VkPhysicalDevicePipelineCreationCacheControlFeaturesEXT*)next; pipelineCreationCacheControlFeatures->pipelineCreationCacheControl = true; @@ -817,6 +822,10 @@ extMemHostProps->minImportedHostPointerAlignment = _metalFeatures.hostMemoryPageSize; break; } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_HOST_IMAGE_COPY_PROPERTIES_EXT: { + populateHostImageCopyProperties((VkPhysicalDeviceHostImageCopyPropertiesEXT*)next); + break; + } case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_PROPERTIES_EXT: { // This isn't implemented yet, but when it is, it is expected that we'll wind up doing it manually. auto* robustness2Props = (VkPhysicalDeviceRobustness2PropertiesEXT*)next; @@ -845,6 +854,77 @@ } } +void MVKPhysicalDevice::populateHostImageCopyProperties(VkPhysicalDeviceHostImageCopyPropertiesEXT* pHostImageCopyProps) { + + // Metal lacks the concept of image layouts, and so does not restrict + // host copy transfers based on them. Assume all image layouts are supported. + // TODO: As extensions that add layouts are implemented, this list should be extended. + VkImageLayout supportedImgLayouts[] = { + VK_IMAGE_LAYOUT_UNDEFINED, + VK_IMAGE_LAYOUT_GENERAL, + VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, + VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, + VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + VK_IMAGE_LAYOUT_PREINITIALIZED, + VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL, + VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL, + VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL, + VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_OPTIMAL, + VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL, + VK_IMAGE_LAYOUT_STENCIL_READ_ONLY_OPTIMAL, + VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL, + VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL, + VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, + }; + uint32_t supportedImgLayoutsCnt = sizeof(supportedImgLayouts) / sizeof(VkImageLayout); + + // pCopySrcLayouts + // If pCopySrcLayouts is NULL, return the number of supported layouts. + if (pHostImageCopyProps->pCopySrcLayouts) { + mvkCopy(pHostImageCopyProps->pCopySrcLayouts, supportedImgLayouts, min(pHostImageCopyProps->copySrcLayoutCount, supportedImgLayoutsCnt)); + } else { + pHostImageCopyProps->copySrcLayoutCount = supportedImgLayoutsCnt; + } + + // pCopyDstLayouts + // If pCopyDstLayouts is NULL, return the number of supported layouts. + if (pHostImageCopyProps->pCopyDstLayouts) { + mvkCopy(pHostImageCopyProps->pCopyDstLayouts, supportedImgLayouts, min(pHostImageCopyProps->copyDstLayoutCount, supportedImgLayoutsCnt)); + } else { + pHostImageCopyProps->copyDstLayoutCount = supportedImgLayoutsCnt; + } + + // optimalTilingLayoutUUID + // Since optimalTilingLayoutUUID is an uint8_t array, use Big-Endian byte ordering, + // so a hex dump of the array is human readable in its parts. + uint8_t* uuid = pHostImageCopyProps->optimalTilingLayoutUUID; + size_t uuidComponentOffset = 0; + mvkClear(uuid, VK_UUID_SIZE); + + // First 4 bytes contains GPU vendor ID. + // Use Big-Endian byte ordering, so a hex dump is human readable + *(uint32_t*)&uuid[uuidComponentOffset] = NSSwapHostIntToBig(_properties.vendorID); + uuidComponentOffset += sizeof(uint32_t); + + // Next 4 bytes contains GPU device ID + // Use Big-Endian byte ordering, so a hex dump is human readable + *(uint32_t*)&uuid[uuidComponentOffset] = NSSwapHostIntToBig(_properties.deviceID); + uuidComponentOffset += sizeof(uint32_t); + + // Next 4 bytes contains OS version + *(MVKOSVersion*)&uuid[uuidComponentOffset] = mvkOSVersion(); + uuidComponentOffset += sizeof(MVKOSVersion); + + // Last 4 bytes are left zero + + // identicalMemoryTypeRequirements + // Metal cannot use Private storage mode with host memory access. + pHostImageCopyProps->identicalMemoryTypeRequirements = false; +} + // Since these are uint8_t arrays, use Big-Endian byte ordering, // so a hex dump of the array is human readable in its parts. void MVKPhysicalDevice::populateDeviceIDProperties(VkPhysicalDeviceVulkan11Properties* pVk11Props) { @@ -1177,6 +1257,15 @@ samplerYcbcrConvProps->combinedImageSamplerDescriptorCount = std::max(_pixelFormats.getChromaSubsamplingPlaneCount(pImageFormatInfo->format), (uint8_t)1u); break; } + case VK_STRUCTURE_TYPE_HOST_IMAGE_COPY_DEVICE_PERFORMANCE_QUERY_EXT: { + // Under Metal, VK_IMAGE_USAGE_HOST_TRANSFER_BIT_EXT does not affect either memory layout + // or access, therefore, both identicalMemoryLayout and optimalDeviceAccess should be VK_TRUE. + // Also, per Vulkan spec, if identicalMemoryLayout is VK_TRUE, optimalDeviceAccess must also be VK_TRUE. + auto* hostImgCopyPerfQry = (VkHostImageCopyDevicePerformanceQueryEXT*)nextProps; + hostImgCopyPerfQry->optimalDeviceAccess = VK_TRUE; + hostImgCopyPerfQry->identicalMemoryLayout = VK_TRUE; + break; + } default: break; } @@ -3068,38 +3157,6 @@ static uint32_t mvkGetEntryProperty(io_registry_entry_t entry, CFStringRef prope _memoryProperties.memoryTypes[typeIndex].propertyFlags = propertyFlags; } -// Initializes the memory properties of this instance. -// Metal Shared: -// - applies to both buffers and textures -// - default mode for buffers on both iOS & macOS -// - default mode for textures on iOS -// - one copy of memory visible to both CPU & GPU -// - coherent at command buffer boundaries -// Metal Private: -// - applies to both buffers and textures -// - accessed only by GPU through render, compute, or BLIT operations -// - no access by CPU -// - always use for framebuffers and renderable textures -// Metal Managed: -// - applies to both buffers and textures -// - default mode for textures on macOS -// - two copies of each buffer or texture when discrete memory available -// - convenience of shared mode, performance of private mode -// - on unified systems behaves like shared memory and has only one copy of content -// - when writing, use: -// - buffer didModifyRange: -// - texture replaceRegion: -// - when reading, use: -// - encoder synchronizeResource: followed by -// - cmdbuff waitUntilCompleted (or completion handler) -// - buffer/texture getBytes: -// Metal Memoryless: -// - applies only to textures used as transient render targets -// - only available with TBDR devices (i.e. on iOS) -// - no device memory is reserved at all -// - storage comes from tile memory -// - contents are undefined after rendering -// - use for temporary renderable textures void MVKPhysicalDevice::initMemoryProperties() { mvkClear(&_memoryProperties); // Start with everything cleared diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDeviceFeatureStructs.def b/MoltenVK/MoltenVK/GPUObjects/MVKDeviceFeatureStructs.def index a8e0ca690..166efb38d 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDeviceFeatureStructs.def +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDeviceFeatureStructs.def @@ -69,6 +69,7 @@ MVK_DEVICE_FEATURE_EXTN(ExtendedDynamicState, EXTENDED_DYNAMIC_STATE, MVK_DEVICE_FEATURE_EXTN(ExtendedDynamicState2, EXTENDED_DYNAMIC_STATE_2, EXT, 3) MVK_DEVICE_FEATURE_EXTN(ExtendedDynamicState3, EXTENDED_DYNAMIC_STATE_3, EXT, 31) MVK_DEVICE_FEATURE_EXTN(FragmentShaderInterlock, FRAGMENT_SHADER_INTERLOCK, EXT, 3) +MVK_DEVICE_FEATURE_EXTN(HostImageCopy, HOST_IMAGE_COPY, EXT, 1) MVK_DEVICE_FEATURE_EXTN(PipelineCreationCacheControl, PIPELINE_CREATION_CACHE_CONTROL, EXT, 1) MVK_DEVICE_FEATURE_EXTN(Robustness2, ROBUSTNESS_2, EXT, 3) MVK_DEVICE_FEATURE_EXTN(ShaderAtomicFloat, SHADER_ATOMIC_FLOAT, EXT, 12) diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDeviceMemory.h b/MoltenVK/MoltenVK/GPUObjects/MVKDeviceMemory.h index cf2ad3fd6..5449608a5 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDeviceMemory.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDeviceMemory.h @@ -69,8 +69,8 @@ class MVKDeviceMemory : public MVKVulkanAPIDeviceObject { inline VkDeviceSize getDeviceMemoryCommitment() { return _allocationSize; } /** - * Returns the host memory address of this memory, or NULL if the memory - * is marked as device-only and cannot be mapped to a host address. + * Returns the host memory address of this memory, or NULL if the memory has not been + * mapped yet, or is marked as device-only and cannot be mapped to a host address. */ inline void* getHostMemoryAddress() { return _pMemory; } diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKImage.h b/MoltenVK/MoltenVK/GPUObjects/MVKImage.h index 7a032f6bd..62e809f75 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKImage.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKImage.h @@ -226,6 +226,10 @@ class MVKImage : public MVKVulkanAPIDeviceObject { VkResult getSubresourceLayout(const VkImageSubresource* pSubresource, VkSubresourceLayout* pLayout); + /** Populates the specified layout for the specified sub-resource. */ + VkResult getSubresourceLayout(const VkImageSubresource2KHR* pSubresource, + VkSubresourceLayout2KHR* pLayout); + /** Populates the specified transfer image descriptor data structure. */ void getTransferDescriptorData(MVKImageDescriptorData& imgData); @@ -252,6 +256,13 @@ class MVKImage : public MVKVulkanAPIDeviceObject { /** Flush underlying buffer memory into the image if necessary */ void flushToDevice(VkDeviceSize offset, VkDeviceSize size); + /** Host-copy the content of this image to or from memory using the CPU. */ + template VkResult copyContent(const CopyInfo* pCopyInfo); + + /** Host-copy the content of one image to another using the CPU. */ + static VkResult copyContent(const VkCopyImageToImageInfoEXT* pCopyImageToImageInfo); + + #pragma mark Metal /** Returns the Metal texture underlying this image. */ @@ -348,6 +359,12 @@ class MVKImage : public MVKVulkanAPIDeviceObject { uint8_t getMemoryBindingCount() const { return (uint8_t)_memoryBindings.size(); } uint8_t getMemoryBindingIndex(uint8_t planeIndex) const; MVKImageMemoryBinding* getMemoryBinding(uint8_t planeIndex); + VkResult copyContent(id mtlTex, + VkMemoryToImageCopyEXT imgRgn, uint32_t mipLevel, uint32_t slice, + void* pImgBytes, size_t rowPitch, size_t depthPitch); + VkResult copyContent(id mtlTex, + VkImageToMemoryCopyEXT imgRgn, uint32_t mipLevel, uint32_t slice, + void* pImgBytes, size_t rowPitch, size_t depthPitch); MVKSmallVector _memoryBindings; MVKSmallVector _planes; diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm b/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm index a02417986..488dc64c0 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm @@ -191,7 +191,7 @@ // Returns a pointer to the internal subresource for the specified MIP level layer. MVKImageSubresource* MVKImagePlane::getSubresource(uint32_t mipLevel, uint32_t arrayLayer) { uint32_t srIdx = (mipLevel * _image->_arrayLayers) + arrayLayer; - return (srIdx < _subresources.size()) ? &_subresources[srIdx] : NULL; + return (srIdx < _subresources.size()) ? &_subresources[srIdx] : nullptr; } // Updates the contents of the underlying MTLTexture, corresponding to the @@ -219,8 +219,8 @@ #if MVK_MACOS std::unique_ptr decompBuffer; if (_image->_is3DCompressed) { - // We cannot upload the texture data directly in this case. But we - // can upload the decompressed image data. + // We cannot upload the texture data directly in this case. + // But we can upload the decompressed image data. std::unique_ptr codec = mvkCreateCodec(_image->getVkFormat()); if (!codec) { _image->reportError(VK_ERROR_FORMAT_NOT_SUPPORTED, "A 3D texture used a compressed format that MoltenVK does not yet support."); @@ -561,6 +561,161 @@ } } +template +static MTLRegion getMTLRegion(const ImgRgn& imgRgn) { + return { mvkMTLOriginFromVkOffset3D(imgRgn.imageOffset), mvkMTLSizeFromVkExtent3D(imgRgn.imageExtent) }; +} + +// Host-copy from memory to a MTLTexture. +VkResult MVKImage::copyContent(id mtlTex, + VkImageToMemoryCopyEXT imgRgn, uint32_t mipLevel, uint32_t slice, + void* pImgBytes, size_t rowPitch, size_t depthPitch) { + [mtlTex getBytes: pImgBytes + bytesPerRow: rowPitch + bytesPerImage: depthPitch + fromRegion: getMTLRegion(imgRgn) + mipmapLevel: mipLevel + slice: slice]; + return VK_SUCCESS; +} + +// Host-copy from a MTLTexture to memory. +VkResult MVKImage::copyContent(id mtlTex, + VkMemoryToImageCopyEXT imgRgn, uint32_t mipLevel, uint32_t slice, + void* pImgBytes, size_t rowPitch, size_t depthPitch) { + VkSubresourceLayout imgLayout = { 0, 0, rowPitch, 0, depthPitch}; +#if MVK_MACOS + // Compressed content cannot be directly uploaded to a compressed 3D texture. + // But we can upload the decompressed image data. + std::unique_ptr decompBuffer; + if (_is3DCompressed) { + std::unique_ptr codec = mvkCreateCodec(getPixelFormats()->getVkFormat(mtlTex.pixelFormat)); + if ( !codec ) { return reportError(VK_ERROR_FORMAT_NOT_SUPPORTED, "A 3D texture used a compressed format that MoltenVK does not yet support."); } + VkSubresourceLayout linearLayout = {}; + linearLayout.rowPitch = 4 * imgRgn.imageExtent.width; + linearLayout.depthPitch = linearLayout.rowPitch * imgRgn.imageExtent.height; + linearLayout.size = linearLayout.depthPitch * imgRgn.imageExtent.depth; + decompBuffer = std::unique_ptr(new char[linearLayout.size]); + codec->decompress(decompBuffer.get(), pImgBytes, linearLayout, imgLayout, imgRgn.imageExtent); + pImgBytes = decompBuffer.get(); + imgLayout = linearLayout; + } +#endif + [mtlTex replaceRegion: getMTLRegion(imgRgn) + mipmapLevel: mipLevel + slice: slice + withBytes: pImgBytes + bytesPerRow: imgLayout.rowPitch + bytesPerImage: imgLayout.depthPitch]; + return VK_SUCCESS; +} + +template +VkResult MVKImage::copyContent(const CopyInfo* pCopyInfo) { + MVKPixelFormats* pixFmts = getPixelFormats(); + VkImageType imgType = getImageType(); + bool is1D = imgType == VK_IMAGE_TYPE_1D; + bool is3D = imgType == VK_IMAGE_TYPE_3D; + + for (uint32_t imgRgnIdx = 0; imgRgnIdx < pCopyInfo->regionCount; imgRgnIdx++) { + auto& imgRgn = pCopyInfo->pRegions[imgRgnIdx]; + auto& imgSubRez = imgRgn.imageSubresource; + + id mtlTex = getMTLTexture(getPlaneFromVkImageAspectFlags(imgSubRez.aspectMask)); + MTLPixelFormat mtlPixFmt = mtlTex.pixelFormat; + bool isPVRTC = pixFmts->isPVRTCFormat(mtlPixFmt); + + uint32_t texelsWidth = imgRgn.memoryRowLength ? imgRgn.memoryRowLength : imgRgn.imageExtent.width; + uint32_t texelsHeight = imgRgn.memoryImageHeight ? imgRgn.memoryImageHeight : imgRgn.imageExtent.height; + uint32_t texelsDepth = imgRgn.imageExtent.depth; + size_t rowPitch = pixFmts->getBytesPerRow(mtlPixFmt, texelsWidth); + size_t depthPitch = pixFmts->getBytesPerLayer(mtlPixFmt, rowPitch, texelsHeight); + size_t arrayPitch = depthPitch * texelsDepth; + + for (uint32_t imgLyrIdx = 0; imgLyrIdx < imgSubRez.layerCount; imgLyrIdx++) { + VkResult rslt = copyContent(mtlTex, + imgRgn, + imgSubRez.mipLevel, + imgSubRez.baseArrayLayer + imgLyrIdx, + (void*)((uintptr_t)imgRgn.pHostPointer + (arrayPitch * imgLyrIdx)), + (isPVRTC || is1D) ? 0 : rowPitch, + (isPVRTC || !is3D) ? 0 : depthPitch); + if (rslt) { return rslt; } + } + } + return VK_SUCCESS; +} + +// Create concrete implementations of the variations of the copyContent() template function. +// This is required since the template is called from outside this file (compilation unit). +template VkResult MVKImage::copyContent(const VkCopyMemoryToImageInfoEXT* pCopyInfo); +template VkResult MVKImage::copyContent(const VkCopyImageToMemoryInfoEXT* pCopyInfo); + +// Host-copy content between images by allocating a temporary memory buffer, copying into it from the +// source image, and then copying from the memory buffer into the destination image, all using the CPU. +VkResult MVKImage::copyContent(const VkCopyImageToImageInfoEXT* pCopyImageToImageInfo) { + for (uint32_t imgRgnIdx = 0; imgRgnIdx < pCopyImageToImageInfo->regionCount; imgRgnIdx++) { + auto& imgRgn = pCopyImageToImageInfo->pRegions[imgRgnIdx]; + + // Create a temporary memory buffer to copy the image region content. + MVKImage* srcMVKImg = (MVKImage*)pCopyImageToImageInfo->srcImage; + MVKPixelFormats* pixFmts = srcMVKImg->getPixelFormats(); + MTLPixelFormat srcMTLPixFmt = srcMVKImg->getMTLPixelFormat(getPlaneFromVkImageAspectFlags(imgRgn.srcSubresource.aspectMask)); + size_t rowPitch = pixFmts->getBytesPerRow(srcMTLPixFmt, imgRgn.extent.width); + size_t depthPitch = pixFmts->getBytesPerLayer(srcMTLPixFmt, rowPitch, imgRgn.extent.height); + size_t arrayPitch = depthPitch * imgRgn.extent.depth; + size_t rgnSizeInBytes = arrayPitch * imgRgn.srcSubresource.layerCount; + auto xfrBuffer = unique_ptr(new char[rgnSizeInBytes]); + void* pImgBytes = xfrBuffer.get(); + + // Host-copy the source image content into the memory buffer using the CPU. + VkImageToMemoryCopyEXT srcCopy = { + VK_STRUCTURE_TYPE_IMAGE_TO_MEMORY_COPY_EXT, + nullptr, + pImgBytes, + 0, + 0, + imgRgn.srcSubresource, + imgRgn.srcOffset, + imgRgn.extent + }; + VkCopyImageToMemoryInfoEXT srcCopyInfo = { + VK_STRUCTURE_TYPE_COPY_IMAGE_TO_MEMORY_INFO_EXT, + nullptr, + pCopyImageToImageInfo->flags, + pCopyImageToImageInfo->srcImage, + pCopyImageToImageInfo->srcImageLayout, + 1, + &srcCopy + }; + srcMVKImg->copyContent(&srcCopyInfo); + + // Host-copy the image content from the memory buffer into the destination image using the CPU. + MVKImage* dstMVKImg = (MVKImage*)pCopyImageToImageInfo->dstImage; + VkMemoryToImageCopyEXT dstCopy = { + VK_STRUCTURE_TYPE_MEMORY_TO_IMAGE_COPY_EXT, + nullptr, + pImgBytes, + 0, + 0, + imgRgn.dstSubresource, + imgRgn.dstOffset, + imgRgn.extent + }; + VkCopyMemoryToImageInfoEXT dstCopyInfo = { + VK_STRUCTURE_TYPE_COPY_MEMORY_TO_IMAGE_INFO_EXT, + nullptr, + pCopyImageToImageInfo->flags, + pCopyImageToImageInfo->dstImage, + pCopyImageToImageInfo->dstImageLayout, + 1, + &dstCopy + }; + dstMVKImg->copyContent(&dstCopyInfo); + } + return VK_SUCCESS; +} + VkImageType MVKImage::getImageType() { return mvkVkImageTypeFromMTLTextureType(_mtlTextureType); } bool MVKImage::getIsDepthStencil() { return getPixelFormats()->getFormatType(_vkFormat) == kMVKFormatDepthStencil; } @@ -591,12 +746,37 @@ VkResult MVKImage::getSubresourceLayout(const VkImageSubresource* pSubresource, VkSubresourceLayout* pLayout) { - uint8_t planeIndex = MVKImage::getPlaneFromVkImageAspectFlags(pSubresource->aspectMask); - MVKImageSubresource* pImgRez = _planes[planeIndex]->getSubresource(pSubresource->mipLevel, pSubresource->arrayLayer); - if ( !pImgRez ) { return VK_INCOMPLETE; } + VkImageSubresource2KHR subresource2 = { VK_STRUCTURE_TYPE_IMAGE_SUBRESOURCE_2_KHR, nullptr, *pSubresource}; + VkSubresourceLayout2KHR layout2 = { VK_STRUCTURE_TYPE_SUBRESOURCE_LAYOUT_2_KHR, nullptr, *pLayout}; + VkResult rslt = getSubresourceLayout(&subresource2, &layout2); + *pLayout = layout2.subresourceLayout; + return rslt; +} + +VkResult MVKImage::getSubresourceLayout(const VkImageSubresource2KHR* pSubresource, + VkSubresourceLayout2KHR* pLayout) { + pLayout->sType = VK_STRUCTURE_TYPE_SUBRESOURCE_LAYOUT_2_KHR; + VkSubresourceHostMemcpySizeEXT* pMemcpySize = nullptr; + for (auto* next = (VkBaseOutStructure*)pLayout->pNext; next; next = next->pNext) { + switch (next->sType) { + case VK_STRUCTURE_TYPE_SUBRESOURCE_HOST_MEMCPY_SIZE_EXT: { + pMemcpySize = (VkSubresourceHostMemcpySizeEXT*)next; + break; + } + default: + break; + } + } - *pLayout = pImgRez->layout; - return VK_SUCCESS; + uint8_t planeIndex = MVKImage::getPlaneFromVkImageAspectFlags(pSubresource->imageSubresource.aspectMask); + MVKImageSubresource* pImgRez = _planes[planeIndex]->getSubresource(pSubresource->imageSubresource.mipLevel, + pSubresource->imageSubresource.arrayLayer); + if ( !pImgRez ) { return VK_INCOMPLETE; } + + pLayout->subresourceLayout = pImgRez->layout; + if (pMemcpySize) { pMemcpySize->size = pImgRez->layout.size; } + + return VK_SUCCESS; } void MVKImage::getTransferDescriptorData(MVKImageDescriptorData& imgData) { @@ -652,11 +832,19 @@ mvkDisableFlags(pMemoryRequirements->memoryTypeBits, getPhysicalDevice()->getHostCoherentMemoryTypes()); } #endif + + VkImageUsageFlags combinedUsage = getCombinedUsage(); + + // If the image can be used in a host-copy transfer, the memory cannot be private. + if (mvkIsAnyFlagEnabled(combinedUsage, VK_IMAGE_USAGE_HOST_TRANSFER_BIT_EXT)) { + mvkDisableFlags(pMemoryRequirements->memoryTypeBits, getPhysicalDevice()->getPrivateMemoryTypes()); + } + // Only transient attachments may use memoryless storage. // Using memoryless as an input attachment requires shader framebuffer fetch, which MoltenVK does not support yet. // TODO: support framebuffer fetch so VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT uses color(m) in shader instead of setFragmentTexture:, which crashes Metal - if (!mvkIsAnyFlagEnabled(getCombinedUsage(), VK_IMAGE_USAGE_TRANSIENT_ATTACHMENT_BIT) || - mvkIsAnyFlagEnabled(getCombinedUsage(), VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT) ) { + if (!mvkIsAnyFlagEnabled(combinedUsage, VK_IMAGE_USAGE_TRANSIENT_ATTACHMENT_BIT) || + mvkIsAnyFlagEnabled(combinedUsage, VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT) ) { mvkDisableFlags(pMemoryRequirements->memoryTypeBits, getPhysicalDevice()->getLazilyAllocatedMemoryTypes()); } diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm b/MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm index 7ec63a9f7..6c4d71d06 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm @@ -768,6 +768,11 @@ ADD_DVC_EXT_ENTRY_POINT(vkCmdSetSampleLocationsEnableEXT, EXT_EXTENDED_DYNAMIC_STATE_3); ADD_DVC_EXT_ENTRY_POINT(vkCmdSetSampleMaskEXT, EXT_EXTENDED_DYNAMIC_STATE_3); ADD_DVC_EXT_ENTRY_POINT(vkCmdSetTessellationDomainOriginEXT, EXT_EXTENDED_DYNAMIC_STATE_3); + ADD_DVC_EXT_ENTRY_POINT(vkCopyImageToImageEXT, EXT_HOST_IMAGE_COPY); + ADD_DVC_EXT_ENTRY_POINT(vkCopyImageToMemoryEXT, EXT_HOST_IMAGE_COPY); + ADD_DVC_EXT_ENTRY_POINT(vkCopyMemoryToImageEXT, EXT_HOST_IMAGE_COPY); + ADD_DVC_EXT_ENTRY_POINT(vkGetImageSubresourceLayout2EXT, EXT_HOST_IMAGE_COPY); + ADD_DVC_EXT_ENTRY_POINT(vkTransitionImageLayoutEXT, EXT_HOST_IMAGE_COPY); } void MVKInstance::logVersions() { diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKPixelFormats.mm b/MoltenVK/MoltenVK/GPUObjects/MVKPixelFormats.mm index 6ba7f3604..02a1461da 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKPixelFormats.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKPixelFormats.mm @@ -2061,6 +2061,7 @@ kMVKVkFormatFeatureFlagsTexRead = (VK_FORMAT_FEATURE_2_SAMPLED_IMAGE_BIT | VK_FORMAT_FEATURE_2_TRANSFER_SRC_BIT | VK_FORMAT_FEATURE_2_TRANSFER_DST_BIT | + VK_FORMAT_FEATURE_2_HOST_IMAGE_TRANSFER_BIT_EXT | VK_FORMAT_FEATURE_2_BLIT_SRC_BIT), kMVKVkFormatFeatureFlagsTexFilter = (VK_FORMAT_FEATURE_2_SAMPLED_IMAGE_FILTER_LINEAR_BIT), kMVKVkFormatFeatureFlagsTexWrite = (VK_FORMAT_FEATURE_2_STORAGE_IMAGE_BIT | @@ -2072,8 +2073,6 @@ kMVKVkFormatFeatureFlagsTexDSAtt = (VK_FORMAT_FEATURE_2_DEPTH_STENCIL_ATTACHMENT_BIT | VK_FORMAT_FEATURE_2_BLIT_DST_BIT), kMVKVkFormatFeatureFlagsTexBlend = (VK_FORMAT_FEATURE_2_COLOR_ATTACHMENT_BLEND_BIT), - kMVKVkFormatFeatureFlagsTexTransfer = (VK_FORMAT_FEATURE_2_TRANSFER_SRC_BIT | - VK_FORMAT_FEATURE_2_TRANSFER_DST_BIT), kMVKVkFormatFeatureFlagsTexChromaSubsampling = (VK_FORMAT_FEATURE_2_MIDPOINT_CHROMA_SAMPLES_BIT_KHR | VK_FORMAT_FEATURE_2_SAMPLED_IMAGE_YCBCR_CONVERSION_LINEAR_FILTER_BIT_KHR), kMVKVkFormatFeatureFlagsTexMultiPlanar = (VK_FORMAT_FEATURE_2_COSITED_CHROMA_SAMPLES_BIT_KHR | @@ -2106,7 +2105,6 @@ if (chromaSubsamplingComponentBits > 0) { if (mtlPixFmtCaps != 0 || chromaSubsamplingPlaneCount > 1) { mtlPixFmtCaps = kMVKMTLFmtCapsRF; - vkProps.optimalTilingFeatures = kMVKVkFormatFeatureFlagsTexTransfer; } enableFormatFeatures(ChromaSubsampling, Tex, mtlPixFmtCaps, vkProps.optimalTilingFeatures); } diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKResource.h b/MoltenVK/MoltenVK/GPUObjects/MVKResource.h index 921f7afbf..5e1fd1a1a 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKResource.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKResource.h @@ -34,29 +34,30 @@ class MVKResource : public MVKVulkanAPIDeviceObject { public: /** Returns the number of bytes required for the entire resource. */ - inline VkDeviceSize getByteCount() { return _byteCount; } + VkDeviceSize getByteCount() { return _byteCount; } /** Returns the byte offset in the bound device memory. */ - inline VkDeviceSize getDeviceMemoryOffset() { return _deviceMemoryOffset; } + VkDeviceSize getDeviceMemoryOffset() { return _deviceMemoryOffset; } /** Binds this resource to the specified offset within the specified memory allocation. */ virtual VkResult bindDeviceMemory(MVKDeviceMemory* mvkMem, VkDeviceSize memOffset); /** Returns the device memory underlying this resource. */ - inline MVKDeviceMemory* getDeviceMemory() { return _deviceMemory; } + MVKDeviceMemory* getDeviceMemory() { return _deviceMemory; } /** Returns whether the memory is accessible from the host. */ - inline bool isMemoryHostAccessible() { return _deviceMemory && _deviceMemory->isMemoryHostAccessible(); } + bool isMemoryHostAccessible() { return _deviceMemory && _deviceMemory->isMemoryHostAccessible(); } /** Returns whether the memory is automatically coherent between device and host. */ - inline bool isMemoryHostCoherent() { return _deviceMemory && _deviceMemory->isMemoryHostCoherent(); } + bool isMemoryHostCoherent() { return _deviceMemory && _deviceMemory->isMemoryHostCoherent(); } /** - * Returns the host memory address of this resource, or NULL if the memory - * is marked as device-only and cannot be mapped to a host address. + * Returns the host memory address of this resource, or NULL if the memory is not mapped to a + * host address yet, or if the memory is marked as device-only and cannot be mapped to a host address. */ - inline void* getHostMemoryAddress() { - return (_deviceMemory ? (void*)((uintptr_t)_deviceMemory->getHostMemoryAddress() + _deviceMemoryOffset) : nullptr); + void* getHostMemoryAddress() { + void* devMemHostAddr = _deviceMemory ? _deviceMemory->getHostMemoryAddress() : nullptr; + return devMemHostAddr ? (void*)((uintptr_t)devMemHostAddr + _deviceMemoryOffset) : nullptr; } /** Applies the specified global memory barrier. */ diff --git a/MoltenVK/MoltenVK/Layers/MVKExtensions.def b/MoltenVK/MoltenVK/Layers/MVKExtensions.def index bbe08cd7b..5eea40932 100644 --- a/MoltenVK/MoltenVK/Layers/MVKExtensions.def +++ b/MoltenVK/MoltenVK/Layers/MVKExtensions.def @@ -114,6 +114,7 @@ MVK_EXTENSION(EXT_external_memory_host, EXT_EXTERNAL_MEMORY_HOST, MVK_EXTENSION(EXT_fragment_shader_interlock, EXT_FRAGMENT_SHADER_INTERLOCK, DEVICE, 10.13, 11.0, 1.0) MVK_EXTENSION(EXT_hdr_metadata, EXT_HDR_METADATA, DEVICE, 10.15, MVK_NA, MVK_NA) MVK_EXTENSION(EXT_headless_surface, EXT_HEADLESS_SURFACE, INSTANCE, 10.11, 8.0, 1.0) +MVK_EXTENSION(EXT_host_image_copy, EXT_HOST_IMAGE_COPY, DEVICE, 10.11, 8.0, 1.0) MVK_EXTENSION(EXT_host_query_reset, EXT_HOST_QUERY_RESET, DEVICE, 10.11, 8.0, 1.0) MVK_EXTENSION(EXT_image_robustness, EXT_IMAGE_ROBUSTNESS, DEVICE, 10.11, 8.0, 1.0) MVK_EXTENSION(EXT_inline_uniform_block, EXT_INLINE_UNIFORM_BLOCK, DEVICE, 10.11, 8.0, 1.0) diff --git a/MoltenVK/MoltenVK/Vulkan/vulkan.mm b/MoltenVK/MoltenVK/Vulkan/vulkan.mm index e28fe7f4c..d2728a18a 100644 --- a/MoltenVK/MoltenVK/Vulkan/vulkan.mm +++ b/MoltenVK/MoltenVK/Vulkan/vulkan.mm @@ -3901,6 +3901,65 @@ MVK_PUBLIC_VULKAN_SYMBOL VkResult vkCreateHeadlessSurfaceEXT( } +#pragma mark - +#pragma mark VK_EXT_host_image_copy extension + +MVK_PUBLIC_VULKAN_SYMBOL VkResult vkCopyImageToImageEXT( + VkDevice device, + const VkCopyImageToImageInfoEXT* pCopyImageToImageInfo) { + + MVKTraceVulkanCallStart(); + VkResult rslt = MVKImage::copyContent(pCopyImageToImageInfo); + MVKTraceVulkanCallEnd(); + return rslt; +} + +MVK_PUBLIC_VULKAN_SYMBOL VkResult vkCopyImageToMemoryEXT( + VkDevice device, + const VkCopyImageToMemoryInfoEXT* pCopyImageToMemoryInfo) { + + MVKTraceVulkanCallStart(); + MVKImage* srcImg = (MVKImage*)pCopyImageToMemoryInfo->srcImage; + VkResult rslt = srcImg->copyContent(pCopyImageToMemoryInfo); + MVKTraceVulkanCallEnd(); + return rslt; +} + +MVK_PUBLIC_VULKAN_SYMBOL VkResult vkCopyMemoryToImageEXT( + VkDevice device, + const VkCopyMemoryToImageInfoEXT* pCopyMemoryToImageInfo) { + + MVKTraceVulkanCallStart(); + MVKImage* dstImg = (MVKImage*)pCopyMemoryToImageInfo->dstImage; + VkResult rslt = dstImg->copyContent(pCopyMemoryToImageInfo); + MVKTraceVulkanCallEnd(); + return rslt; +} + +MVK_PUBLIC_VULKAN_SYMBOL void vkGetImageSubresourceLayout2EXT( + VkDevice device, + VkImage image, + const VkImageSubresource2KHR* pSubresource, + VkSubresourceLayout2KHR* pLayout) { + + MVKTraceVulkanCallStart(); + MVKImage* mvkImg = (MVKImage*)image; + mvkImg->getSubresourceLayout(pSubresource, pLayout); + MVKTraceVulkanCallEnd(); +} + +MVK_PUBLIC_VULKAN_SYMBOL VkResult vkTransitionImageLayoutEXT( + VkDevice device, + uint32_t transitionCount, + const VkHostImageLayoutTransitionInfoEXT* pTransitions) { + + MVKTraceVulkanCallStart(); + // Metal lacks the concept of image layouts, so nothing to do. + MVKTraceVulkanCallEnd(); + return VK_SUCCESS; +} + + #pragma mark - #pragma mark VK_EXT_host_query_reset extension