diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 52d5fc86..e75b2e41 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -49,6 +49,7 @@ add_library(OpenCL-objects OBJECT device.cpp device_properties.cpp event.cpp + image_format.cpp init.cpp kernel.cpp log.cpp diff --git a/src/api.cpp b/src/api.cpp index 6ab4c9c9..5a2375ff 100644 --- a/src/api.cpp +++ b/src/api.cpp @@ -23,6 +23,7 @@ #include "queue.hpp" #include "semaphore.hpp" #include "tracing.hpp" +#include "image_format.hpp" #define LOG_API_CALL(fmt, ...) \ cvk_debug_group_fn(loggroup::api, fmt, __VA_ARGS__) @@ -4590,195 +4591,47 @@ cl_int CLVK_API_CALL clGetImageInfo(cl_mem image, cl_image_info param_name, return ret; } -struct ClFormatMapHash { - size_t operator()(const cl_image_format& format) const { - return format.image_channel_order << 16 | - format.image_channel_data_type; - } -}; - -struct ClFormatMapEqual { - bool operator()(const cl_image_format& lhs, - const cl_image_format& rhs) const { - return lhs.image_channel_order == rhs.image_channel_order && - lhs.image_channel_data_type == rhs.image_channel_data_type; - } -}; - -struct image_format_support { - static constexpr cl_mem_flags RO = CL_MEM_READ_ONLY; - static constexpr cl_mem_flags WO = CL_MEM_WRITE_ONLY; - static constexpr cl_mem_flags RW = CL_MEM_KERNEL_READ_AND_WRITE; - static constexpr cl_mem_flags ROWO = RO | WO | CL_MEM_READ_WRITE; - static constexpr cl_mem_flags ALL = ROWO | RW; - - image_format_support(cl_mem_flags flags, VkFormat fmt) - : flags(flags), vkfmt(fmt) {} - image_format_support(VkFormat fmt) : flags(ALL), vkfmt(fmt) {} - - cl_mem_flags flags; - VkFormat vkfmt; -}; - -std::unordered_map - gFormatMaps = { - // R formats - {{CL_R, CL_UNORM_INT8}, VK_FORMAT_R8_UNORM}, - {{CL_R, CL_SNORM_INT8}, VK_FORMAT_R8_SNORM}, - {{CL_R, CL_UNSIGNED_INT8}, VK_FORMAT_R8_UINT}, - {{CL_R, CL_SIGNED_INT8}, VK_FORMAT_R8_SINT}, - {{CL_R, CL_UNORM_INT16}, VK_FORMAT_R16_UNORM}, - {{CL_R, CL_SNORM_INT16}, VK_FORMAT_R16_SNORM}, - {{CL_R, CL_UNSIGNED_INT16}, VK_FORMAT_R16_UINT}, - {{CL_R, CL_SIGNED_INT16}, VK_FORMAT_R16_SINT}, - {{CL_R, CL_HALF_FLOAT}, VK_FORMAT_R16_SFLOAT}, - {{CL_R, CL_UNSIGNED_INT32}, VK_FORMAT_R32_UINT}, - {{CL_R, CL_SIGNED_INT32}, VK_FORMAT_R32_SINT}, - {{CL_R, CL_FLOAT}, VK_FORMAT_R32_SFLOAT}, - - // LUMINANCE formats - {{CL_LUMINANCE, CL_UNORM_INT8}, - {image_format_support::ROWO, VK_FORMAT_R8_UNORM}}, - {{CL_LUMINANCE, CL_SNORM_INT8}, - {image_format_support::ROWO, VK_FORMAT_R8_SNORM}}, - {{CL_LUMINANCE, CL_UNSIGNED_INT8}, - {image_format_support::ROWO, VK_FORMAT_R8_UINT}}, - {{CL_LUMINANCE, CL_SIGNED_INT8}, - {image_format_support::ROWO, VK_FORMAT_R8_SINT}}, - {{CL_LUMINANCE, CL_UNORM_INT16}, - {image_format_support::ROWO, VK_FORMAT_R16_UNORM}}, - {{CL_LUMINANCE, CL_SNORM_INT16}, - {image_format_support::ROWO, VK_FORMAT_R16_SNORM}}, - {{CL_LUMINANCE, CL_UNSIGNED_INT16}, - {image_format_support::ROWO, VK_FORMAT_R16_UINT}}, - {{CL_LUMINANCE, CL_SIGNED_INT16}, - {image_format_support::ROWO, VK_FORMAT_R16_SINT}}, - {{CL_LUMINANCE, CL_HALF_FLOAT}, - {image_format_support::ROWO, VK_FORMAT_R16_SFLOAT}}, - {{CL_LUMINANCE, CL_UNSIGNED_INT32}, - {image_format_support::ROWO, VK_FORMAT_R32_UINT}}, - {{CL_LUMINANCE, CL_SIGNED_INT32}, - {image_format_support::ROWO, VK_FORMAT_R32_SINT}}, - {{CL_LUMINANCE, CL_FLOAT}, - {image_format_support::ROWO, VK_FORMAT_R32_SFLOAT}}, - - // INTENSITY formats - {{CL_INTENSITY, CL_UNORM_INT8}, - {image_format_support::ROWO, VK_FORMAT_R8_UNORM}}, - {{CL_INTENSITY, CL_SNORM_INT8}, - {image_format_support::ROWO, VK_FORMAT_R8_SNORM}}, - {{CL_INTENSITY, CL_UNSIGNED_INT8}, - {image_format_support::ROWO, VK_FORMAT_R8_UINT}}, - {{CL_INTENSITY, CL_SIGNED_INT8}, - {image_format_support::ROWO, VK_FORMAT_R8_SINT}}, - {{CL_INTENSITY, CL_UNORM_INT16}, - {image_format_support::ROWO, VK_FORMAT_R16_UNORM}}, - {{CL_INTENSITY, CL_SNORM_INT16}, - {image_format_support::ROWO, VK_FORMAT_R16_SNORM}}, - {{CL_INTENSITY, CL_UNSIGNED_INT16}, - {image_format_support::ROWO, VK_FORMAT_R16_UINT}}, - {{CL_INTENSITY, CL_SIGNED_INT16}, - {image_format_support::ROWO, VK_FORMAT_R16_SINT}}, - {{CL_INTENSITY, CL_HALF_FLOAT}, - {image_format_support::ROWO, VK_FORMAT_R16_SFLOAT}}, - {{CL_INTENSITY, CL_UNSIGNED_INT32}, - {image_format_support::ROWO, VK_FORMAT_R32_UINT}}, - {{CL_INTENSITY, CL_SIGNED_INT32}, - {image_format_support::ROWO, VK_FORMAT_R32_SINT}}, - {{CL_INTENSITY, CL_FLOAT}, - {image_format_support::ROWO, VK_FORMAT_R32_SFLOAT}}, - - // RG formats - {{CL_RG, CL_UNORM_INT8}, VK_FORMAT_R8G8_UNORM}, - {{CL_RG, CL_SNORM_INT8}, VK_FORMAT_R8G8_SNORM}, - {{CL_RG, CL_UNSIGNED_INT8}, VK_FORMAT_R8G8_UINT}, - {{CL_RG, CL_SIGNED_INT8}, VK_FORMAT_R8G8_SINT}, - {{CL_RG, CL_UNORM_INT16}, VK_FORMAT_R16G16_UNORM}, - {{CL_RG, CL_SNORM_INT16}, VK_FORMAT_R16G16_SNORM}, - {{CL_RG, CL_UNSIGNED_INT16}, VK_FORMAT_R16G16_UINT}, - {{CL_RG, CL_SIGNED_INT16}, VK_FORMAT_R16G16_SINT}, - {{CL_RG, CL_HALF_FLOAT}, VK_FORMAT_R16G16_SFLOAT}, - {{CL_RG, CL_UNSIGNED_INT32}, VK_FORMAT_R32G32_UINT}, - {{CL_RG, CL_SIGNED_INT32}, VK_FORMAT_R32G32_SINT}, - {{CL_RG, CL_FLOAT}, VK_FORMAT_R32G32_SFLOAT}, - - // RGB formats - {{CL_RGB, CL_UNORM_INT8}, VK_FORMAT_R8G8B8_UNORM}, - {{CL_RGB, CL_SNORM_INT8}, VK_FORMAT_R8G8B8_SNORM}, - {{CL_RGB, CL_UNSIGNED_INT8}, VK_FORMAT_R8G8B8_UINT}, - {{CL_RGB, CL_SIGNED_INT8}, VK_FORMAT_R8G8B8_SINT}, - {{CL_RGB, CL_UNORM_INT16}, VK_FORMAT_R16G16B16_UNORM}, - {{CL_RGB, CL_SNORM_INT16}, VK_FORMAT_R16G16B16_SNORM}, - {{CL_RGB, CL_UNSIGNED_INT16}, VK_FORMAT_R16G16B16_UINT}, - {{CL_RGB, CL_SIGNED_INT16}, VK_FORMAT_R16G16B16_SINT}, - {{CL_RGB, CL_HALF_FLOAT}, VK_FORMAT_R16G16B16_SFLOAT}, - {{CL_RGB, CL_UNSIGNED_INT32}, VK_FORMAT_R32G32B32_UINT}, - {{CL_RGB, CL_SIGNED_INT32}, VK_FORMAT_R32G32B32_SINT}, - {{CL_RGB, CL_FLOAT}, VK_FORMAT_R32G32B32_SFLOAT}, - {{CL_RGB, CL_UNORM_SHORT_565}, VK_FORMAT_R5G6B5_UNORM_PACK16}, - - // RGBA formats - {{CL_RGBA, CL_UNORM_INT8}, VK_FORMAT_R8G8B8A8_UNORM}, - {{CL_RGBA, CL_SNORM_INT8}, VK_FORMAT_R8G8B8A8_SNORM}, - {{CL_RGBA, CL_UNSIGNED_INT8}, VK_FORMAT_R8G8B8A8_UINT}, - {{CL_RGBA, CL_SIGNED_INT8}, VK_FORMAT_R8G8B8A8_SINT}, - {{CL_RGBA, CL_UNORM_INT16}, VK_FORMAT_R16G16B16A16_UNORM}, - {{CL_RGBA, CL_SNORM_INT16}, VK_FORMAT_R16G16B16A16_SNORM}, - {{CL_RGBA, CL_UNSIGNED_INT16}, VK_FORMAT_R16G16B16A16_UINT}, - {{CL_RGBA, CL_SIGNED_INT16}, VK_FORMAT_R16G16B16A16_SINT}, - {{CL_RGBA, CL_HALF_FLOAT}, VK_FORMAT_R16G16B16A16_SFLOAT}, - {{CL_RGBA, CL_UNSIGNED_INT32}, VK_FORMAT_R32G32B32A32_UINT}, - {{CL_RGBA, CL_SIGNED_INT32}, VK_FORMAT_R32G32B32A32_SINT}, - {{CL_RGBA, CL_FLOAT}, VK_FORMAT_R32G32B32A32_SFLOAT}, - - // BGRA formats - {{CL_BGRA, CL_UNORM_INT8}, VK_FORMAT_B8G8R8A8_UNORM}, - {{CL_BGRA, CL_SNORM_INT8}, VK_FORMAT_B8G8R8A8_SNORM}, - {{CL_BGRA, CL_UNSIGNED_INT8}, VK_FORMAT_B8G8R8A8_UINT}, - {{CL_BGRA, CL_SIGNED_INT8}, VK_FORMAT_B8G8R8A8_SINT}, -}; - -static void get_component_mappings_for_channel_order( - cl_channel_order order, VkComponentMapping* components_sampled, - VkComponentMapping* components_storage) { - if (order == CL_LUMINANCE) { - *components_sampled = {VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, - VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_A}; - } else if (order == CL_INTENSITY) { - *components_sampled = {VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, - VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R}; - } else { - *components_sampled = { - VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, - VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY}; - } - - *components_storage = { - VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, - VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY}; +bool operator!=(const VkComponentMapping& lhs, const VkComponentMapping& rhs) { + return lhs.r != rhs.r || lhs.g != rhs.g || lhs.b != rhs.b || lhs.a != rhs.a; } -bool cl_image_format_to_vulkan_format(const cl_image_format& clformat, - VkFormat* format, - VkComponentMapping* components_sampled, - VkComponentMapping* components_storage) { - auto m = gFormatMaps.find(clformat); - bool success = false; - - if (m != gFormatMaps.end()) { - *format = (*m).second.vkfmt; - success = true; +static bool is_image_format_supported( + VkPhysicalDevice& pdev, VkFormat format, cl_mem_object_type image_type, + const VkFormatFeatureFlags& required_format_feature_flags, + cl_channel_order image_channel_order) { + VkFormatProperties properties; + vkGetPhysicalDeviceFormatProperties(pdev, format, &properties); + + cvk_debug("Vulkan format %d:", format); + cvk_debug( + " linear : %s", + vulkan_format_features_string(properties.linearTilingFeatures).c_str()); + cvk_debug(" optimal: %s", + vulkan_format_features_string(properties.optimalTilingFeatures) + .c_str()); + cvk_debug(" buffer : %s", + vulkan_format_features_string(properties.bufferFeatures).c_str()); + + cvk_debug( + "Required format features %s", + vulkan_format_features_string(required_format_feature_flags).c_str()); + VkFormatFeatureFlags features; + if (image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER) { + features = properties.bufferFeatures; + } else { + // TODO support linear tiling + features = properties.optimalTilingFeatures; + } + if ((features & required_format_feature_flags) == + required_format_feature_flags) { + if ((image_channel_order == CL_LUMINANCE || + image_channel_order == CL_INTENSITY) && + (image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER)) { + return false; + } + return true; } - - get_component_mappings_for_channel_order( - clformat.image_channel_order, components_sampled, components_storage); - - return success; -} - -bool operator!=(const VkComponentMapping& lhs, const VkComponentMapping& rhs) { - return lhs.r != rhs.r || lhs.g != rhs.g || lhs.b != rhs.b || lhs.a != rhs.a; + return false; } cl_int CLVK_API_CALL clGetSupportedImageFormats(cl_context context, @@ -4809,7 +4662,8 @@ cl_int CLVK_API_CALL clGetSupportedImageFormats(cl_context context, cl_uint num_formats_found = 0; - auto pdev = icd_downcast(context)->device()->vulkan_physical_device(); + auto dev = icd_downcast(context)->device(); + auto pdev = dev->vulkan_physical_device(); const VkFormatFeatureFlags required_format_feature_flags = cvk_image::required_format_feature_flags_for(image_type, flags); @@ -4820,56 +4674,49 @@ cl_int CLVK_API_CALL clGetSupportedImageFormats(cl_context context, // Iterate over all known CL/VK format associations and report the CL // formats for which the Vulkan format is supported - for (auto const& clvkfmt : gFormatMaps) { - const cl_image_format& clfmt = clvkfmt.first; - auto const& fmt_support = clvkfmt.second; - if ((fmt_support.flags & flags) != flags) { - continue; - } - VkFormat format = fmt_support.vkfmt; - VkFormatProperties properties; - vkGetPhysicalDeviceFormatProperties(pdev, format, &properties); - - cvk_debug("Vulkan format %d:", format); - cvk_debug(" linear : %s", - vulkan_format_features_string(properties.linearTilingFeatures) - .c_str()); - cvk_debug(" optimal: %s", vulkan_format_features_string( - properties.optimalTilingFeatures) - .c_str()); - cvk_debug( - " buffer : %s", - vulkan_format_features_string(properties.bufferFeatures).c_str()); - - cvk_debug("Required format features %s", - vulkan_format_features_string(required_format_feature_flags) - .c_str()); - VkFormatFeatureFlags features; - if (image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER) { - features = properties.bufferFeatures; - } else { - // TODO support linear tiling - features = properties.optimalTilingFeatures; - } - if ((features & required_format_feature_flags) == - required_format_feature_flags) { + const cl_channel_order channel_orders[] = { + CL_R, CL_A, CL_RG, CL_RA, CL_RGB, + CL_RGBA, CL_BGRA, CL_ARGB, CL_INTENSITY, CL_LUMINANCE, + CL_Rx, CL_RGx, CL_RGBx, CL_DEPTH, CL_DEPTH_STENCIL, + CL_sRGB, CL_sRGBx, CL_sRGBA, CL_sBGRA, CL_ABGR, + }; + const cl_channel_type channel_data_types[] = { + CL_SNORM_INT8, CL_SNORM_INT16, CL_UNORM_INT8, + CL_UNORM_INT16, CL_UNORM_SHORT_565, CL_UNORM_SHORT_555, + CL_UNORM_INT_101010, CL_SIGNED_INT8, CL_SIGNED_INT16, + CL_SIGNED_INT32, CL_UNSIGNED_INT8, CL_UNSIGNED_INT16, + CL_UNSIGNED_INT32, CL_HALF_FLOAT, CL_FLOAT, + CL_UNORM_INT24, CL_UNORM_INT_101010_2, + }; + for (auto const image_channel_order : channel_orders) { + for (auto const image_channel_data_type : channel_data_types) { VkComponentMapping components_sampled, components_storage; - get_component_mappings_for_channel_order(clfmt.image_channel_order, - &components_sampled, - &components_storage); - if ((components_sampled != components_storage) && - (image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER)) { + image_format_support fmt_support; + cl_image_format clfmt = { + .image_channel_order = image_channel_order, + .image_channel_data_type = image_channel_data_type, + }; + if (!cl_image_format_to_vulkan_format( + clfmt, image_type, dev, &fmt_support, &components_sampled, + &components_storage)) { continue; } + if ((fmt_support.flags & flags) != flags) { + continue; + } + if (!is_image_format_supported(pdev, fmt_support.vkfmt, image_type, + required_format_feature_flags, image_channel_order)) { + continue; + } + + // image format is supported if ((image_formats != nullptr) && (num_formats_found < num_entries)) { image_formats[num_formats_found] = clfmt; cvk_debug_fn( "reporting image format {%s, %s}", - cl_channel_order_to_string(clfmt.image_channel_order) - .c_str(), - cl_channel_type_to_string(clfmt.image_channel_data_type) - .c_str()); + cl_channel_order_to_string(image_channel_order).c_str(), + cl_channel_type_to_string(image_channel_data_type).c_str()); } num_formats_found++; } diff --git a/src/device.hpp b/src/device.hpp index 91ed7bc3..1ca3d3da 100644 --- a/src/device.hpp +++ b/src/device.hpp @@ -548,6 +548,11 @@ struct cvk_device : public _cl_device_id, const cvk_vulkan_extension_functions& vkfns() const { return m_vkfns; } + bool is_bgra_format_not_supported_for_image1d_buffer() const { + return m_clvk_properties + ->is_bgra_format_not_supported_for_image1d_buffer(); + } + private: std::string version_desc() const { std::string ret = "CLVK on Vulkan v"; diff --git a/src/device_properties.cpp b/src/device_properties.cpp index 9d490f98..8a1a0db7 100644 --- a/src/device_properties.cpp +++ b/src/device_properties.cpp @@ -105,7 +105,11 @@ struct cvk_device_properties_intel : public cvk_device_properties { }); } std::string get_compile_options() const override final { - return "-hack-mul-extended -hack-convert-to-float"; + return "-hack-mul-extended -hack-convert-to-float -hack-image1d-buffer-bgra"; + } + bool + is_bgra_format_not_supported_for_image1d_buffer() const override final { + return true; } }; diff --git a/src/device_properties.hpp b/src/device_properties.hpp index fe4794b8..e92c37a3 100644 --- a/src/device_properties.hpp +++ b/src/device_properties.hpp @@ -52,6 +52,10 @@ struct cvk_device_properties { virtual bool is_non_uniform_decoration_broken() const { return false; } + virtual bool is_bgra_format_not_supported_for_image1d_buffer() const { + return false; + } + virtual ~cvk_device_properties() {} }; diff --git a/src/image_format.cpp b/src/image_format.cpp new file mode 100644 index 00000000..c9605219 --- /dev/null +++ b/src/image_format.cpp @@ -0,0 +1,202 @@ +// Copyright 2023 The clvk authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "image_format.hpp" +#include "CL/cl.h" +#include + +std::unordered_map + FormatMaps = { + // R formats + {{CL_R, CL_UNORM_INT8}, VK_FORMAT_R8_UNORM}, + {{CL_R, CL_SNORM_INT8}, VK_FORMAT_R8_SNORM}, + {{CL_R, CL_UNSIGNED_INT8}, VK_FORMAT_R8_UINT}, + {{CL_R, CL_SIGNED_INT8}, VK_FORMAT_R8_SINT}, + {{CL_R, CL_UNORM_INT16}, VK_FORMAT_R16_UNORM}, + {{CL_R, CL_SNORM_INT16}, VK_FORMAT_R16_SNORM}, + {{CL_R, CL_UNSIGNED_INT16}, VK_FORMAT_R16_UINT}, + {{CL_R, CL_SIGNED_INT16}, VK_FORMAT_R16_SINT}, + {{CL_R, CL_HALF_FLOAT}, VK_FORMAT_R16_SFLOAT}, + {{CL_R, CL_UNSIGNED_INT32}, VK_FORMAT_R32_UINT}, + {{CL_R, CL_SIGNED_INT32}, VK_FORMAT_R32_SINT}, + {{CL_R, CL_FLOAT}, VK_FORMAT_R32_SFLOAT}, + + // LUMINANCE formats + {{CL_LUMINANCE, CL_UNORM_INT8}, + {image_format_support::ROWO, VK_FORMAT_R8_UNORM}}, + {{CL_LUMINANCE, CL_SNORM_INT8}, + {image_format_support::ROWO, VK_FORMAT_R8_SNORM}}, + {{CL_LUMINANCE, CL_UNSIGNED_INT8}, + {image_format_support::ROWO, VK_FORMAT_R8_UINT}}, + {{CL_LUMINANCE, CL_SIGNED_INT8}, + {image_format_support::ROWO, VK_FORMAT_R8_SINT}}, + {{CL_LUMINANCE, CL_UNORM_INT16}, + {image_format_support::ROWO, VK_FORMAT_R16_UNORM}}, + {{CL_LUMINANCE, CL_SNORM_INT16}, + {image_format_support::ROWO, VK_FORMAT_R16_SNORM}}, + {{CL_LUMINANCE, CL_UNSIGNED_INT16}, + {image_format_support::ROWO, VK_FORMAT_R16_UINT}}, + {{CL_LUMINANCE, CL_SIGNED_INT16}, + {image_format_support::ROWO, VK_FORMAT_R16_SINT}}, + {{CL_LUMINANCE, CL_HALF_FLOAT}, + {image_format_support::ROWO, VK_FORMAT_R16_SFLOAT}}, + {{CL_LUMINANCE, CL_UNSIGNED_INT32}, + {image_format_support::ROWO, VK_FORMAT_R32_UINT}}, + {{CL_LUMINANCE, CL_SIGNED_INT32}, + {image_format_support::ROWO, VK_FORMAT_R32_SINT}}, + {{CL_LUMINANCE, CL_FLOAT}, + {image_format_support::ROWO, VK_FORMAT_R32_SFLOAT}}, + + // INTENSITY formats + {{CL_INTENSITY, CL_UNORM_INT8}, + {image_format_support::ROWO, VK_FORMAT_R8_UNORM}}, + {{CL_INTENSITY, CL_SNORM_INT8}, + {image_format_support::ROWO, VK_FORMAT_R8_SNORM}}, + {{CL_INTENSITY, CL_UNSIGNED_INT8}, + {image_format_support::ROWO, VK_FORMAT_R8_UINT}}, + {{CL_INTENSITY, CL_SIGNED_INT8}, + {image_format_support::ROWO, VK_FORMAT_R8_SINT}}, + {{CL_INTENSITY, CL_UNORM_INT16}, + {image_format_support::ROWO, VK_FORMAT_R16_UNORM}}, + {{CL_INTENSITY, CL_SNORM_INT16}, + {image_format_support::ROWO, VK_FORMAT_R16_SNORM}}, + {{CL_INTENSITY, CL_UNSIGNED_INT16}, + {image_format_support::ROWO, VK_FORMAT_R16_UINT}}, + {{CL_INTENSITY, CL_SIGNED_INT16}, + {image_format_support::ROWO, VK_FORMAT_R16_SINT}}, + {{CL_INTENSITY, CL_HALF_FLOAT}, + {image_format_support::ROWO, VK_FORMAT_R16_SFLOAT}}, + {{CL_INTENSITY, CL_UNSIGNED_INT32}, + {image_format_support::ROWO, VK_FORMAT_R32_UINT}}, + {{CL_INTENSITY, CL_SIGNED_INT32}, + {image_format_support::ROWO, VK_FORMAT_R32_SINT}}, + {{CL_INTENSITY, CL_FLOAT}, + {image_format_support::ROWO, VK_FORMAT_R32_SFLOAT}}, + + // RG formats + {{CL_RG, CL_UNORM_INT8}, VK_FORMAT_R8G8_UNORM}, + {{CL_RG, CL_SNORM_INT8}, VK_FORMAT_R8G8_SNORM}, + {{CL_RG, CL_UNSIGNED_INT8}, VK_FORMAT_R8G8_UINT}, + {{CL_RG, CL_SIGNED_INT8}, VK_FORMAT_R8G8_SINT}, + {{CL_RG, CL_UNORM_INT16}, VK_FORMAT_R16G16_UNORM}, + {{CL_RG, CL_SNORM_INT16}, VK_FORMAT_R16G16_SNORM}, + {{CL_RG, CL_UNSIGNED_INT16}, VK_FORMAT_R16G16_UINT}, + {{CL_RG, CL_SIGNED_INT16}, VK_FORMAT_R16G16_SINT}, + {{CL_RG, CL_HALF_FLOAT}, VK_FORMAT_R16G16_SFLOAT}, + {{CL_RG, CL_UNSIGNED_INT32}, VK_FORMAT_R32G32_UINT}, + {{CL_RG, CL_SIGNED_INT32}, VK_FORMAT_R32G32_SINT}, + {{CL_RG, CL_FLOAT}, VK_FORMAT_R32G32_SFLOAT}, + + // RGB formats + {{CL_RGB, CL_UNORM_INT8}, VK_FORMAT_R8G8B8_UNORM}, + {{CL_RGB, CL_SNORM_INT8}, VK_FORMAT_R8G8B8_SNORM}, + {{CL_RGB, CL_UNSIGNED_INT8}, VK_FORMAT_R8G8B8_UINT}, + {{CL_RGB, CL_SIGNED_INT8}, VK_FORMAT_R8G8B8_SINT}, + {{CL_RGB, CL_UNORM_INT16}, VK_FORMAT_R16G16B16_UNORM}, + {{CL_RGB, CL_SNORM_INT16}, VK_FORMAT_R16G16B16_SNORM}, + {{CL_RGB, CL_UNSIGNED_INT16}, VK_FORMAT_R16G16B16_UINT}, + {{CL_RGB, CL_SIGNED_INT16}, VK_FORMAT_R16G16B16_SINT}, + {{CL_RGB, CL_HALF_FLOAT}, VK_FORMAT_R16G16B16_SFLOAT}, + {{CL_RGB, CL_UNSIGNED_INT32}, VK_FORMAT_R32G32B32_UINT}, + {{CL_RGB, CL_SIGNED_INT32}, VK_FORMAT_R32G32B32_SINT}, + {{CL_RGB, CL_FLOAT}, VK_FORMAT_R32G32B32_SFLOAT}, + {{CL_RGB, CL_UNORM_SHORT_565}, VK_FORMAT_R5G6B5_UNORM_PACK16}, + + // RGBA formats + {{CL_RGBA, CL_UNORM_INT8}, VK_FORMAT_R8G8B8A8_UNORM}, + {{CL_RGBA, CL_SNORM_INT8}, VK_FORMAT_R8G8B8A8_SNORM}, + {{CL_RGBA, CL_UNSIGNED_INT8}, VK_FORMAT_R8G8B8A8_UINT}, + {{CL_RGBA, CL_SIGNED_INT8}, VK_FORMAT_R8G8B8A8_SINT}, + {{CL_RGBA, CL_UNORM_INT16}, VK_FORMAT_R16G16B16A16_UNORM}, + {{CL_RGBA, CL_SNORM_INT16}, VK_FORMAT_R16G16B16A16_SNORM}, + {{CL_RGBA, CL_UNSIGNED_INT16}, VK_FORMAT_R16G16B16A16_UINT}, + {{CL_RGBA, CL_SIGNED_INT16}, VK_FORMAT_R16G16B16A16_SINT}, + {{CL_RGBA, CL_HALF_FLOAT}, VK_FORMAT_R16G16B16A16_SFLOAT}, + {{CL_RGBA, CL_UNSIGNED_INT32}, VK_FORMAT_R32G32B32A32_UINT}, + {{CL_RGBA, CL_SIGNED_INT32}, VK_FORMAT_R32G32B32A32_SINT}, + {{CL_RGBA, CL_FLOAT}, VK_FORMAT_R32G32B32A32_SFLOAT}, + + // BGRA formats + {{CL_BGRA, CL_UNORM_INT8}, VK_FORMAT_B8G8R8A8_UNORM}, + {{CL_BGRA, CL_SNORM_INT8}, VK_FORMAT_B8G8R8A8_SNORM}, + {{CL_BGRA, CL_UNSIGNED_INT8}, VK_FORMAT_B8G8R8A8_UINT}, + {{CL_BGRA, CL_SIGNED_INT8}, VK_FORMAT_B8G8R8A8_SINT}, +}; + +static bool get_equivalent_bgra_format_for_image1d_buffer( + VkFormat& fmt, VkComponentMapping* components_sampled, + VkComponentMapping* components_storage) { + const std::unordered_map map = { + {VK_FORMAT_B8G8R8A8_UNORM, VK_FORMAT_R8G8B8A8_UNORM}, + }; + const VkComponentMapping BGRA_mapping = { + VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_IDENTITY, + VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_IDENTITY}; + + auto it = map.find(fmt); + if (it != map.end()) { + fmt = it->second; + *components_sampled = *components_storage = BGRA_mapping; + return true; + } + return false; +} + +static void get_component_mappings_for_channel_order( + cl_channel_order order, VkComponentMapping* components_sampled, + VkComponentMapping* components_storage) { + if (order == CL_LUMINANCE) { + *components_sampled = {VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, + VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_A}; + } else if (order == CL_INTENSITY) { + *components_sampled = {VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, + VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R}; + } else { + *components_sampled = { + VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, + VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY}; + } + + *components_storage = { + VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, + VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY}; +} + +bool cl_image_format_to_vulkan_format(cl_image_format clformat, + cl_mem_object_type image_type, + cvk_device* device, + image_format_support* fmt_support, + VkComponentMapping* components_sampled, + VkComponentMapping* components_storage) { + auto m = FormatMaps.find(clformat); + bool success = false; + + if (m != FormatMaps.end()) { + *fmt_support = (*m).second; + success = true; + } + + get_component_mappings_for_channel_order( + clformat.image_channel_order, components_sampled, components_storage); + + if (image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER && + clformat.image_channel_order == CL_BGRA && + device->is_bgra_format_not_supported_for_image1d_buffer()) { + get_equivalent_bgra_format_for_image1d_buffer( + fmt_support->vkfmt, components_sampled, components_storage); + } + + return success; +} diff --git a/src/image_format.hpp b/src/image_format.hpp new file mode 100644 index 00000000..f26c8ef0 --- /dev/null +++ b/src/image_format.hpp @@ -0,0 +1,58 @@ +// Copyright 2018 The clvk authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "cl_headers.hpp" +#include "device.hpp" + +#include + +struct ClFormatMapHash { + size_t operator()(const cl_image_format& format) const { + return format.image_channel_order << 16 | + format.image_channel_data_type; + } +}; + +struct ClFormatMapEqual { + bool operator()(const cl_image_format& lhs, + const cl_image_format& rhs) const { + return lhs.image_channel_order == rhs.image_channel_order && + lhs.image_channel_data_type == rhs.image_channel_data_type; + } +}; + +struct image_format_support { + static constexpr cl_mem_flags RO = CL_MEM_READ_ONLY; + static constexpr cl_mem_flags WO = CL_MEM_WRITE_ONLY; + static constexpr cl_mem_flags RW = CL_MEM_KERNEL_READ_AND_WRITE; + static constexpr cl_mem_flags ROWO = RO | WO | CL_MEM_READ_WRITE; + static constexpr cl_mem_flags ALL = ROWO | RW; + + image_format_support(cl_mem_flags flags, VkFormat fmt) + : flags(flags), vkfmt(fmt) {} + image_format_support(VkFormat fmt) : flags(ALL), vkfmt(fmt) {} + image_format_support() {} + + cl_mem_flags flags; + VkFormat vkfmt; +}; + +bool cl_image_format_to_vulkan_format(cl_image_format clformat, + cl_mem_object_type image_type, + cvk_device* device, + image_format_support* fmt_support, + VkComponentMapping* components_sampled, + VkComponentMapping* components_storage); diff --git a/src/memory.cpp b/src/memory.cpp index d79ff84e..a655c642 100644 --- a/src/memory.cpp +++ b/src/memory.cpp @@ -14,6 +14,7 @@ #include +#include "image_format.hpp" #include "memory.hpp" #include "queue.hpp" @@ -299,12 +300,6 @@ cvk_image* cvk_image::create(cvk_context* ctx, cl_mem_flags flags, return image.release(); } -extern bool -cl_image_format_to_vulkan_format(const cl_image_format& clfmt, - VkFormat* format, - VkComponentMapping* components_sampled, - VkComponentMapping* components_storage); - bool cvk_image::init_vulkan_image() { // Translate image type and size VkImageType image_type; @@ -372,11 +367,14 @@ bool cvk_image::init_vulkan_image() { } // Translate format - VkFormat format; + image_format_support fmt; VkComponentMapping components_sampled, components_storage; + auto device = m_context->device(); + auto success = cl_image_format_to_vulkan_format( - m_format, &format, &components_sampled, &components_storage); + m_format, m_desc.image_type, device, &fmt, &components_sampled, + &components_storage); if (!success) { return false; // TODO error code } @@ -387,7 +385,7 @@ bool cvk_image::init_vulkan_image() { nullptr, // pNext 0, // flags image_type, // imageType - format, // format + fmt.vkfmt, // format extent, // extent 1, // mipLevels array_layers, // arrayLayers @@ -400,7 +398,6 @@ bool cvk_image::init_vulkan_image() { VK_IMAGE_LAYOUT_UNDEFINED, // initialLayout }; - auto device = m_context->device(); auto vkdev = device->vulkan_device(); auto res = vkCreateImage(vkdev, &imageCreateInfo, nullptr, &m_image); @@ -451,7 +448,7 @@ bool cvk_image::init_vulkan_image() { 0, // flags m_image, // image view_type, // viewType; - format, // format + fmt.vkfmt, // format components_sampled, // components subresource, // subresourceRange }; @@ -498,11 +495,12 @@ bool cvk_image::init_vulkan_texel_buffer() { auto device = m_context->device(); auto vkdev = device->vulkan_device(); - VkFormat format; + image_format_support fmt; VkComponentMapping components_sampled, components_storage; auto success = cl_image_format_to_vulkan_format( - m_format, &format, &components_sampled, &components_storage); + m_format, m_desc.image_type, device, &fmt, &components_sampled, + &components_storage); if (!success) { return false; } @@ -518,7 +516,7 @@ bool cvk_image::init_vulkan_texel_buffer() { nullptr, 0, // flags vkbuf, // buffer - format, // format + fmt.vkfmt, // format offset, // offset VK_WHOLE_SIZE // range };