Skip to content

Commit

Permalink
Add support for VK_EXT_host_image_copy extension.
Browse files Browse the repository at this point in the history
- MVKResource::getHostMemoryAddress() return nullptr if
  MVKDeviceMemory::getHostMemoryAddress() returns null pointer,
  regardless of local offset.
- Remove unnecessary  enum value kMVKVkFormatFeatureFlagsTexTransfer
  to reduce redundancy between read and transfer feature flag options.
- Fix spelling of mvkVkOffset3DFromMTLOrigin() (unrelated).
- MVKResource remove unnecessary inline qualifiers (unrelated).
- MVKDevice remove some obsolete commentary (unrelated).
  • Loading branch information
billhollings committed Apr 16, 2024
1 parent 3f6a3c2 commit 2290e86
Show file tree
Hide file tree
Showing 14 changed files with 388 additions and 57 deletions.
1 change: 1 addition & 0 deletions Docs/MoltenVK_Runtime_UserGuide.md
Original file line number Diff line number Diff line change
Expand Up @@ -315,6 +315,7 @@ In addition to core *Vulkan* functionality, **MoltenVK** also supports the foll
- `VK_EXT_hdr_metadata`
- *macOS only.*
- `VK_EXT_headless_surface`
- `VK_EXT_host_image_copy`
- `VK_EXT_host_query_reset`
- `VK_EXT_image_robustness`
- `VK_EXT_inline_uniform_block`
Expand Down
2 changes: 2 additions & 0 deletions Docs/Whats_New.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ MoltenVK 1.2.9

Released TBD

- Add support for extensions:
- `VK_EXT_host_image_copy`
- To support legacy apps, restore `MoltenVK/dylib` directory via symlink to `MoltenVK/dynamic/dylib`.
- Add `MVKPerformanceTracker::previous` to track latest-but-one performance measurements.
- Fix crash when using `VK_EXT_metal_objects` under _ARC_.
Expand Down
2 changes: 1 addition & 1 deletion MoltenVK/MoltenVK/API/mvk_datatypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -452,7 +452,7 @@ static inline MTLOrigin mvkMTLOriginFromVkOffset3D(VkOffset3D vkOffset) {
}

/** Returns a Vulkan VkOffset3D constructed from a Metal MTLOrigin. */
static inline VkOffset3D mvkVkOffset3DFromMTLSize(MTLOrigin mtlOrigin) {
static inline VkOffset3D mvkVkOffset3DFromMTLOrigin(MTLOrigin mtlOrigin) {
return { (int32_t)mtlOrigin.x, (int32_t)mtlOrigin.y, (int32_t)mtlOrigin.z };
}

Expand Down
1 change: 1 addition & 0 deletions MoltenVK/MoltenVK/GPUObjects/MVKDevice.h
Original file line number Diff line number Diff line change
Expand Up @@ -417,6 +417,7 @@ class MVKPhysicalDevice : public MVKDispatchableVulkanAPIObject {
uint32_t getMoltenVKGitRevision();
void populateDeviceIDProperties(VkPhysicalDeviceVulkan11Properties* pVk11Props);
void populateSubgroupProperties(VkPhysicalDeviceVulkan11Properties* pVk11Props);
void populateHostImageCopyProperties(VkPhysicalDeviceHostImageCopyPropertiesEXT* pHostImageCopyProps);
void logGPUInfo();

id<MTLDevice> _mtlDevice;
Expand Down
121 changes: 89 additions & 32 deletions MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
Original file line number Diff line number Diff line change
Expand Up @@ -443,6 +443,11 @@
interlockFeatures->fragmentShaderShadingRateInterlock = false; // Requires variable rate shading; not supported yet in Metal
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_HOST_IMAGE_COPY_FEATURES_EXT: {
auto* hostImageCopyFeatures = (VkPhysicalDeviceHostImageCopyFeaturesEXT*)next;
hostImageCopyFeatures->hostImageCopy = true;
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_CREATION_CACHE_CONTROL_FEATURES_EXT: {
auto* pipelineCreationCacheControlFeatures = (VkPhysicalDevicePipelineCreationCacheControlFeaturesEXT*)next;
pipelineCreationCacheControlFeatures->pipelineCreationCacheControl = true;
Expand Down Expand Up @@ -817,6 +822,10 @@
extMemHostProps->minImportedHostPointerAlignment = _metalFeatures.hostMemoryPageSize;
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_HOST_IMAGE_COPY_PROPERTIES_EXT: {
populateHostImageCopyProperties((VkPhysicalDeviceHostImageCopyPropertiesEXT*)next);
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_PROPERTIES_EXT: {
// This isn't implemented yet, but when it is, it is expected that we'll wind up doing it manually.
auto* robustness2Props = (VkPhysicalDeviceRobustness2PropertiesEXT*)next;
Expand Down Expand Up @@ -845,6 +854,77 @@
}
}

void MVKPhysicalDevice::populateHostImageCopyProperties(VkPhysicalDeviceHostImageCopyPropertiesEXT* pHostImageCopyProps) {

// Metal lacks the concept of image layouts, and so does not restrict
// host copy transfers based on them. Assume all image layouts are supported.
// TODO: As extensions that add layouts are implemented, this list should be extended.
VkImageLayout supportedImgLayouts[] = {
VK_IMAGE_LAYOUT_UNDEFINED,
VK_IMAGE_LAYOUT_GENERAL,
VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL,
VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
VK_IMAGE_LAYOUT_PREINITIALIZED,
VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL,
VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL,
VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL,
VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_OPTIMAL,
VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL,
VK_IMAGE_LAYOUT_STENCIL_READ_ONLY_OPTIMAL,
VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL,
VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL,
VK_IMAGE_LAYOUT_PRESENT_SRC_KHR,
};
uint32_t supportedImgLayoutsCnt = sizeof(supportedImgLayouts) / sizeof(VkImageLayout);

// pCopySrcLayouts
// If pCopySrcLayouts is NULL, return the number of supported layouts.
if (pHostImageCopyProps->pCopySrcLayouts) {
mvkCopy(pHostImageCopyProps->pCopySrcLayouts, supportedImgLayouts, min(pHostImageCopyProps->copySrcLayoutCount, supportedImgLayoutsCnt));
} else {
pHostImageCopyProps->copySrcLayoutCount = supportedImgLayoutsCnt;
}

// pCopyDstLayouts
// If pCopyDstLayouts is NULL, return the number of supported layouts.
if (pHostImageCopyProps->pCopyDstLayouts) {
mvkCopy(pHostImageCopyProps->pCopyDstLayouts, supportedImgLayouts, min(pHostImageCopyProps->copyDstLayoutCount, supportedImgLayoutsCnt));
} else {
pHostImageCopyProps->copyDstLayoutCount = supportedImgLayoutsCnt;
}

// optimalTilingLayoutUUID
// Since optimalTilingLayoutUUID is an uint8_t array, use Big-Endian byte ordering,
// so a hex dump of the array is human readable in its parts.
uint8_t* uuid = pHostImageCopyProps->optimalTilingLayoutUUID;
size_t uuidComponentOffset = 0;
mvkClear(uuid, VK_UUID_SIZE);

// First 4 bytes contains GPU vendor ID.
// Use Big-Endian byte ordering, so a hex dump is human readable
*(uint32_t*)&uuid[uuidComponentOffset] = NSSwapHostIntToBig(_properties.vendorID);
uuidComponentOffset += sizeof(uint32_t);

// Next 4 bytes contains GPU device ID
// Use Big-Endian byte ordering, so a hex dump is human readable
*(uint32_t*)&uuid[uuidComponentOffset] = NSSwapHostIntToBig(_properties.deviceID);
uuidComponentOffset += sizeof(uint32_t);

// Next 4 bytes contains OS version
*(MVKOSVersion*)&uuid[uuidComponentOffset] = mvkOSVersion();
uuidComponentOffset += sizeof(MVKOSVersion);

// Last 4 bytes are left zero

// identicalMemoryTypeRequirements
// Metal cannot use Private storage mode with host memory access.
pHostImageCopyProps->identicalMemoryTypeRequirements = false;
}

// Since these are uint8_t arrays, use Big-Endian byte ordering,
// so a hex dump of the array is human readable in its parts.
void MVKPhysicalDevice::populateDeviceIDProperties(VkPhysicalDeviceVulkan11Properties* pVk11Props) {
Expand Down Expand Up @@ -1177,6 +1257,15 @@
samplerYcbcrConvProps->combinedImageSamplerDescriptorCount = std::max(_pixelFormats.getChromaSubsamplingPlaneCount(pImageFormatInfo->format), (uint8_t)1u);
break;
}
case VK_STRUCTURE_TYPE_HOST_IMAGE_COPY_DEVICE_PERFORMANCE_QUERY_EXT: {
// Under Metal, VK_IMAGE_USAGE_HOST_TRANSFER_BIT_EXT does not affect either memory layout
// or access, therefore, both identicalMemoryLayout and optimalDeviceAccess should be VK_TRUE.
// Also, per Vulkan spec, if identicalMemoryLayout is VK_TRUE, optimalDeviceAccess must also be VK_TRUE.
auto* hostImgCopyPerfQry = (VkHostImageCopyDevicePerformanceQueryEXT*)nextProps;
hostImgCopyPerfQry->optimalDeviceAccess = VK_TRUE;
hostImgCopyPerfQry->identicalMemoryLayout = VK_TRUE;
break;
}
default:
break;
}
Expand Down Expand Up @@ -3068,38 +3157,6 @@ static uint32_t mvkGetEntryProperty(io_registry_entry_t entry, CFStringRef prope
_memoryProperties.memoryTypes[typeIndex].propertyFlags = propertyFlags;
}

// Initializes the memory properties of this instance.
// Metal Shared:
// - applies to both buffers and textures
// - default mode for buffers on both iOS & macOS
// - default mode for textures on iOS
// - one copy of memory visible to both CPU & GPU
// - coherent at command buffer boundaries
// Metal Private:
// - applies to both buffers and textures
// - accessed only by GPU through render, compute, or BLIT operations
// - no access by CPU
// - always use for framebuffers and renderable textures
// Metal Managed:
// - applies to both buffers and textures
// - default mode for textures on macOS
// - two copies of each buffer or texture when discrete memory available
// - convenience of shared mode, performance of private mode
// - on unified systems behaves like shared memory and has only one copy of content
// - when writing, use:
// - buffer didModifyRange:
// - texture replaceRegion:
// - when reading, use:
// - encoder synchronizeResource: followed by
// - cmdbuff waitUntilCompleted (or completion handler)
// - buffer/texture getBytes:
// Metal Memoryless:
// - applies only to textures used as transient render targets
// - only available with TBDR devices (i.e. on iOS)
// - no device memory is reserved at all
// - storage comes from tile memory
// - contents are undefined after rendering
// - use for temporary renderable textures
void MVKPhysicalDevice::initMemoryProperties() {

mvkClear(&_memoryProperties); // Start with everything cleared
Expand Down
1 change: 1 addition & 0 deletions MoltenVK/MoltenVK/GPUObjects/MVKDeviceFeatureStructs.def
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ MVK_DEVICE_FEATURE_EXTN(ExtendedDynamicState, EXTENDED_DYNAMIC_STATE,
MVK_DEVICE_FEATURE_EXTN(ExtendedDynamicState2, EXTENDED_DYNAMIC_STATE_2, EXT, 3)
MVK_DEVICE_FEATURE_EXTN(ExtendedDynamicState3, EXTENDED_DYNAMIC_STATE_3, EXT, 31)
MVK_DEVICE_FEATURE_EXTN(FragmentShaderInterlock, FRAGMENT_SHADER_INTERLOCK, EXT, 3)
MVK_DEVICE_FEATURE_EXTN(HostImageCopy, HOST_IMAGE_COPY, EXT, 1)
MVK_DEVICE_FEATURE_EXTN(PipelineCreationCacheControl, PIPELINE_CREATION_CACHE_CONTROL, EXT, 1)
MVK_DEVICE_FEATURE_EXTN(Robustness2, ROBUSTNESS_2, EXT, 3)
MVK_DEVICE_FEATURE_EXTN(ShaderAtomicFloat, SHADER_ATOMIC_FLOAT, EXT, 12)
Expand Down
4 changes: 2 additions & 2 deletions MoltenVK/MoltenVK/GPUObjects/MVKDeviceMemory.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,8 @@ class MVKDeviceMemory : public MVKVulkanAPIDeviceObject {
inline VkDeviceSize getDeviceMemoryCommitment() { return _allocationSize; }

/**
* Returns the host memory address of this memory, or NULL if the memory
* is marked as device-only and cannot be mapped to a host address.
* Returns the host memory address of this memory, or NULL if the memory has not been
* mapped yet, or is marked as device-only and cannot be mapped to a host address.
*/
inline void* getHostMemoryAddress() { return _pMemory; }

Expand Down
17 changes: 17 additions & 0 deletions MoltenVK/MoltenVK/GPUObjects/MVKImage.h
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,10 @@ class MVKImage : public MVKVulkanAPIDeviceObject {
VkResult getSubresourceLayout(const VkImageSubresource* pSubresource,
VkSubresourceLayout* pLayout);

/** Populates the specified layout for the specified sub-resource. */
VkResult getSubresourceLayout(const VkImageSubresource2KHR* pSubresource,
VkSubresourceLayout2KHR* pLayout);

/** Populates the specified transfer image descriptor data structure. */
void getTransferDescriptorData(MVKImageDescriptorData& imgData);

Expand All @@ -252,6 +256,13 @@ class MVKImage : public MVKVulkanAPIDeviceObject {
/** Flush underlying buffer memory into the image if necessary */
void flushToDevice(VkDeviceSize offset, VkDeviceSize size);

/** Host-copy the content of this image to or from memory using the CPU. */
template<typename CopyInfo> VkResult copyContent(const CopyInfo* pCopyInfo);

/** Host-copy the content of one image to another using the CPU. */
static VkResult copyContent(const VkCopyImageToImageInfoEXT* pCopyImageToImageInfo);


#pragma mark Metal

/** Returns the Metal texture underlying this image. */
Expand Down Expand Up @@ -348,6 +359,12 @@ class MVKImage : public MVKVulkanAPIDeviceObject {
uint8_t getMemoryBindingCount() const { return (uint8_t)_memoryBindings.size(); }
uint8_t getMemoryBindingIndex(uint8_t planeIndex) const;
MVKImageMemoryBinding* getMemoryBinding(uint8_t planeIndex);
VkResult copyContent(id<MTLTexture> mtlTex,
VkMemoryToImageCopyEXT imgRgn, uint32_t mipLevel, uint32_t slice,
void* pImgBytes, size_t rowPitch, size_t depthPitch);
VkResult copyContent(id<MTLTexture> mtlTex,
VkImageToMemoryCopyEXT imgRgn, uint32_t mipLevel, uint32_t slice,
void* pImgBytes, size_t rowPitch, size_t depthPitch);

MVKSmallVector<MVKImageMemoryBinding*, 3> _memoryBindings;
MVKSmallVector<MVKImagePlane*, 3> _planes;
Expand Down
Loading

0 comments on commit 2290e86

Please sign in to comment.