From c849f7956a77df538f318da50237d4cb283c7313 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?K=C3=A9vin=20Petit?= Date: Sun, 30 Oct 2022 16:36:48 +0000 Subject: [PATCH 1/6] WIP texel buffers Change-Id: I4eef91af393b5487bbc208bdc96d43daa4c5c462 --- src/api.cpp | 80 ++++++++++++++++++++++++++++++------------------ src/device.cpp | 1 + src/kernel.cpp | 36 +++++++++++++++++++++- src/memory.cpp | 81 ++++++++++++++++++++++++++++++++++++++++++------- src/memory.hpp | 45 ++++++++++++++++++++++----- src/program.cpp | 12 ++++++++ src/program.hpp | 6 +++- src/queue.hpp | 4 ++- 8 files changed, 214 insertions(+), 51 deletions(-) diff --git a/src/api.cpp b/src/api.cpp index 91d201ad..9866f13e 100644 --- a/src/api.cpp +++ b/src/api.cpp @@ -710,7 +710,7 @@ cl_int CLVK_API_CALL clGetDeviceInfo(cl_device_id dev, size_ret = sizeof(val_uint); break; case CL_DEVICE_IMAGE_MAX_BUFFER_SIZE: - val_sizet = device->vulkan_limits().maxImageDimension1D; + val_sizet = device->vulkan_limits().maxTexelBufferElements; copy_ptr = &val_sizet; size_ret = sizeof(val_sizet); break; @@ -4718,22 +4718,13 @@ std::unordered_map descriptor_writes; std::vector buffer_info; std::vector image_info; + std::vector buffer_views; descriptor_writes.reserve(max_descriptor_writes); buffer_info.reserve(max_descriptor_writes); image_info.reserve(max_descriptor_writes); + buffer_views.reserve(max_descriptor_writes); // Setup module-scope variables if (program->module_constant_data_buffer() != nullptr && @@ -324,6 +328,36 @@ bool cvk_kernel_argument_values::setup_descriptor_sets() { descriptor_writes.push_back(writeDescriptorSet); break; } + case kernel_argument_kind::storage_texel_buffer: + case kernel_argument_kind::uniform_texel_buffer: { + auto image = static_cast(get_arg_value(arg)); + bool uniform = + arg.kind == kernel_argument_kind::uniform_texel_buffer; + auto view = image->vulkan_buffer_view(); + buffer_views.push_back(view); + + cvk_debug_fn("buffer view %p @ set = %u, binding = %u", view, + arg.descriptorSet, arg.binding); + + VkDescriptorType dtype = + uniform ? VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER + : VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; + + VkWriteDescriptorSet writeDescriptorSet = { + VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + nullptr, + ds[arg.descriptorSet], + arg.binding, // dstBinding + 0, // dstArrayElement + 1, // descriptorCount + dtype, + nullptr, // pImageInfo + nullptr, // pBufferInfo + &buffer_views.back(), // pTexelBufferView + }; + descriptor_writes.push_back(writeDescriptorSet); + break; + } case kernel_argument_kind::pod: // skip POD arguments case kernel_argument_kind::pod_ubo: case kernel_argument_kind::pod_pushconstant: diff --git a/src/memory.cpp b/src/memory.cpp index 5688e45b..f9017413 100644 --- a/src/memory.cpp +++ b/src/memory.cpp @@ -243,22 +243,33 @@ bool cvk_sampler::init() { VkFormatFeatureFlags cvk_image::required_format_feature_flags_for(cl_mem_object_type type, cl_mem_flags flags) { - UNUSED(type); // TODO will be required for 1D buffer images - // All images require TRANSFER_SRC, TRANSFER_DST + // 1Dbuffer requires + // RW / RaW: STORAGE_TEXEL_BUFFER + // RO: UNIFORM_TEXEL_BUFFER + // All other images require TRANSFER_SRC, TRANSFER_DST // read-only: SAMPLED_IMAGE, SAMPLED_IMAGE_FILTER_LINEAR // write-only: STORAGE_IMAGE // read-write: STORAGE_IMAGE, SAMPLED_IMAGE, SAMPLED_IMAGE_FILTER_LINEAR // read-and-write: STORAGE_IMAGE VkFormatFeatureFlags format_feature_flags = 0; - format_feature_flags = - VK_FORMAT_FEATURE_TRANSFER_SRC_BIT | VK_FORMAT_FEATURE_TRANSFER_DST_BIT; - + if (type != CL_MEM_OBJECT_IMAGE1D_BUFFER) { + format_feature_flags = VK_FORMAT_FEATURE_TRANSFER_SRC_BIT | + VK_FORMAT_FEATURE_TRANSFER_DST_BIT; + } VkFormatFeatureFlags format_feature_flags_RO; - format_feature_flags_RO = VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | - VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT; - + if (type == CL_MEM_OBJECT_IMAGE1D_BUFFER) { + format_feature_flags_RO = VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT; + } else { + format_feature_flags_RO = + VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | + VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT; + } VkFormatFeatureFlags format_feature_flags_WO; - format_feature_flags_WO = VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT; + if (type == CL_MEM_OBJECT_IMAGE1D_BUFFER) { + format_feature_flags_WO = VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_BIT; + } else { + format_feature_flags_WO = VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT; + } if (flags & (CL_MEM_KERNEL_READ_AND_WRITE | CL_MEM_WRITE_ONLY)) { format_feature_flags |= format_feature_flags_WO; @@ -292,7 +303,7 @@ cl_image_format_to_vulkan_format(const cl_image_format& clfmt, VkComponentMapping* components_sampled, VkComponentMapping* components_storage); -bool cvk_image::init() { +bool cvk_image::init_vulkan_image() { // Translate image type and size VkImageType image_type; VkImageViewType view_type; @@ -320,7 +331,6 @@ bool cvk_image::init() { size_t host_ptr_size = 0; switch (m_desc.image_type) { - case CL_MEM_OBJECT_IMAGE1D_BUFFER: case CL_MEM_OBJECT_IMAGE1D: image_type = VK_IMAGE_TYPE_1D; view_type = VK_IMAGE_VIEW_TYPE_1D; @@ -485,6 +495,55 @@ bool cvk_image::init() { return true; } +bool cvk_image::init_vulkan_texel_buffer() { + VkResult res; + + auto device = m_context->device(); + auto vkdev = device->vulkan_device(); + + VkFormat format; + VkComponentMapping components_sampled, components_storage; + + auto success = cl_image_format_to_vulkan_format( + m_format, &format, &components_sampled, &components_storage); + if (!success) { + return false; + } + + CVK_ASSERT(buffer()); + CVK_ASSERT(buffer()->is_buffer_type()); + + auto vkbuf = static_cast(buffer())->vulkan_buffer(); + + VkBufferViewCreateInfo createInfo = { + VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO, + nullptr, + 0, // flags + vkbuf, // buffer + format, // format + 0, // offset + VK_WHOLE_SIZE // range + }; + + res = vkCreateBufferView(vkdev, &createInfo, nullptr, &m_buffer_view); + if (res != VK_SUCCESS) { + cvk_error_fn("Could not create buffer view"); + return false; + } + + buffer()->retain(); + + return true; +} + +bool cvk_image::init() { + if (is_backed_by_buffer_view()) { + return init_vulkan_texel_buffer(); + } else { + return init_vulkan_image(); + } +} + void cvk_image::prepare_fill_pattern(const void* input_pattern, fill_pattern_array& pattern, size_t* size_ret) const { diff --git a/src/memory.hpp b/src/memory.hpp index 63817558..0d71a6ad 100644 --- a/src/memory.hpp +++ b/src/memory.hpp @@ -317,7 +317,9 @@ struct cvk_buffer : public cvk_mem { VkBufferUsageFlags usage_flags = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | - VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; + VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | + VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | + VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT; if (m_context->device()->uses_physical_addressing()) { usage_flags |= VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT; } @@ -485,10 +487,16 @@ struct cvk_image : public cvk_mem { /* FIXME parent_offset */ 0, std::move(properties), desc->image_type), m_desc(*desc), m_format(*format), m_image(VK_NULL_HANDLE), - m_sampled_view(VK_NULL_HANDLE), m_storage_view(VK_NULL_HANDLE) { + m_sampled_view(VK_NULL_HANDLE), m_storage_view(VK_NULL_HANDLE), + m_buffer_view(VK_NULL_HANDLE) { // All images require asynchronous initialiation for the initial - // layout transition (and copy/use host ptr init) - m_init_tracker.set_state(cvk_mem_init_state::required); + // layout transition (and copy/use host ptr init) apart from + // those backed by a texel buffer + if (is_backed_by_buffer_view()) { + m_init_tracker.set_state(cvk_mem_init_state::completed); + } else { + m_init_tracker.set_state(cvk_mem_init_state::required); + } } ~cvk_image() { @@ -502,6 +510,9 @@ struct cvk_image : public cvk_mem { if (m_storage_view != VK_NULL_HANDLE) { vkDestroyImageView(vkdev, m_storage_view, nullptr); } + if (m_buffer_view != VK_NULL_HANDLE) { + vkDestroyBufferView(vkdev, m_buffer_view, nullptr); + } if (buffer() != nullptr) { buffer()->release(); } @@ -530,9 +541,26 @@ struct cvk_image : public cvk_mem { const cl_image_format* format, void* host_ptr, std::vector&& properties); - VkImage vulkan_image() const { return m_image; } - VkImageView vulkan_sampled_view() const { return m_sampled_view; } - VkImageView vulkan_storage_view() const { return m_storage_view; } + bool is_backed_by_buffer_view() const { + return type() == CL_MEM_OBJECT_IMAGE1D_BUFFER; + } + + VkImage vulkan_image() const { + CVK_ASSERT(!is_backed_by_buffer_view()); + return m_image; + } + VkImageView vulkan_sampled_view() const { + CVK_ASSERT(!is_backed_by_buffer_view()); + return m_sampled_view; + } + VkImageView vulkan_storage_view() const { + CVK_ASSERT(!is_backed_by_buffer_view()); + return m_storage_view; + } + VkBufferView vulkan_buffer_view() const { + CVK_ASSERT(is_backed_by_buffer_view()); + return m_buffer_view; + } const cl_image_format& format() const { return m_format; } size_t element_size() const { switch (m_format.image_channel_data_type) { @@ -693,6 +721,8 @@ struct cvk_image : public cvk_mem { size_t* size_ret) const; private: + bool init_vulkan_image(); + bool init_vulkan_texel_buffer(); bool init(); size_t num_channels() const { @@ -746,6 +776,7 @@ struct cvk_image : public cvk_mem { VkImage m_image; VkImageView m_sampled_view; VkImageView m_storage_view; + VkBufferView m_buffer_view; std::unordered_map> m_mappings; std::mutex m_mappings_lock; std::unique_ptr m_init_data; diff --git a/src/program.cpp b/src/program.cpp index b0390fab..9e538dcd 100644 --- a/src/program.cpp +++ b/src/program.cpp @@ -117,6 +117,10 @@ spv_result_t parse_reflection(void* user_data, return kernel_argument_kind::sampled_image; case NonSemanticClspvReflectionArgumentStorageImage: return kernel_argument_kind::storage_image; + case NonSemanticClspvReflectionArgumentStorageTexelBuffer: + return kernel_argument_kind::storage_texel_buffer; + case NonSemanticClspvReflectionArgumentUniformTexelBuffer: + return kernel_argument_kind::uniform_texel_buffer; case NonSemanticClspvReflectionArgumentSampler: return kernel_argument_kind::sampler; case NonSemanticClspvReflectionArgumentWorkgroup: @@ -234,6 +238,8 @@ spv_result_t parse_reflection(void* user_data, case NonSemanticClspvReflectionArgumentUniform: case NonSemanticClspvReflectionArgumentSampledImage: case NonSemanticClspvReflectionArgumentStorageImage: + case NonSemanticClspvReflectionArgumentStorageTexelBuffer: + case NonSemanticClspvReflectionArgumentUniformTexelBuffer: case NonSemanticClspvReflectionArgumentSampler: { // These arguments have descriptor set, binding and an optional // arg info. @@ -1743,6 +1749,12 @@ bool cvk_entry_point::build_descriptor_sets_layout_bindings_for_arguments( case kernel_argument_kind::storage_image: dt = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; break; + case kernel_argument_kind::storage_texel_buffer: + dt = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; + break; + case kernel_argument_kind::uniform_texel_buffer: + dt = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; + break; case kernel_argument_kind::sampler: dt = VK_DESCRIPTOR_TYPE_SAMPLER; break; diff --git a/src/program.hpp b/src/program.hpp index 8df31c7e..7a65bc35 100644 --- a/src/program.hpp +++ b/src/program.hpp @@ -47,6 +47,8 @@ enum class kernel_argument_kind pointer_pushconstant, sampled_image, storage_image, + storage_texel_buffer, + uniform_texel_buffer, sampler, local, unused, @@ -101,7 +103,9 @@ struct kernel_argument { return (kind == kernel_argument_kind::buffer) || (kind == kernel_argument_kind::buffer_ubo) || (kind == kernel_argument_kind::sampled_image) || - (kind == kernel_argument_kind::storage_image); + (kind == kernel_argument_kind::storage_image) || + (kind == kernel_argument_kind::storage_texel_buffer) || + (kind == kernel_argument_kind::uniform_texel_buffer); } bool is_unused() const { return kind == kernel_argument_kind::unused; } diff --git a/src/queue.hpp b/src/queue.hpp index 4acd0e5e..134e8d55 100644 --- a/src/queue.hpp +++ b/src/queue.hpp @@ -1141,7 +1141,9 @@ struct cvk_command_image_init final : public cvk_command_batchable { cvk_command_image_init(cvk_command_queue* queue, cvk_image* image) : cvk_command_batchable(CLVK_COMMAND_IMAGE_INIT, queue), - m_image(image) {} + m_image(image) { + CVK_ASSERT(!m_image->is_backed_by_buffer_view()); + } bool is_data_movement() const override { return true; } CHECK_RETURN cl_int build_batchable_inner(cvk_command_buffer& cmdbuf) override final; From 1744028e1dfe587d9dc081aafe1ae8e0a6de4c4e Mon Sep 17 00:00:00 2001 From: Romaric Jodin Date: Tue, 8 Aug 2023 15:19:12 +0200 Subject: [PATCH 2/6] add support for image1d_buffer remove 1DBUFFER mention in init_vulkan_image limit CL_DEVICE_IMAGE_MAX_BUFFER_SIZE WITH CL_DEVICE_MAX_MEM_ALLOC_SIZE --- src/api.cpp | 248 ++++++++++++++++++++++++++++++++++++------------- src/memory.cpp | 35 +++---- src/memory.hpp | 13 +-- 3 files changed, 206 insertions(+), 90 deletions(-) diff --git a/src/api.cpp b/src/api.cpp index 9866f13e..9bcbbf00 100644 --- a/src/api.cpp +++ b/src/api.cpp @@ -710,7 +710,9 @@ cl_int CLVK_API_CALL clGetDeviceInfo(cl_device_id dev, size_ret = sizeof(val_uint); break; case CL_DEVICE_IMAGE_MAX_BUFFER_SIZE: - val_sizet = device->vulkan_limits().maxTexelBufferElements; + val_sizet = + std::min((uint64_t)device->vulkan_limits().maxTexelBufferElements, + device->max_mem_alloc_size()); copy_ptr = &val_sizet; size_ret = sizeof(val_sizet); break; @@ -3718,6 +3720,32 @@ cl_int CLVK_API_CALL clEnqueueCopyBufferRect( cmd, num_events_in_wait_list, event_wait_list, event); } +void* cvk_enqueue_map_buffer(cvk_command_queue* cq, cvk_buffer* buffer, + cl_bool blocking_map, size_t offset, size_t size, + cl_map_flags map_flags, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, cl_event* event, + cl_int* errcode_ret) { + auto cmd = new cvk_command_map_buffer(cq, buffer, offset, size, map_flags); + + void* map_ptr; + cl_int err = cmd->build(&map_ptr); + + // FIXME This error cannot occur for objects created with + // CL_MEM_USE_HOST_PTR or CL_MEM_ALLOC_HOST_PTR. + if (err != CL_SUCCESS) { + *errcode_ret = CL_MAP_FAILURE; + return nullptr; + } + + err = cq->enqueue_command_with_deps( + cmd, blocking_map, num_events_in_wait_list, event_wait_list, event); + + *errcode_ret = err; + + return map_ptr; +} + void* CLVK_API_CALL clEnqueueMapBuffer(cl_command_queue cq, cl_mem buf, cl_bool blocking_map, cl_map_flags map_flags, size_t offset, @@ -3799,23 +3827,10 @@ void* CLVK_API_CALL clEnqueueMapBuffer(cl_command_queue cq, cl_mem buf, return nullptr; } - auto cmd = new cvk_command_map_buffer(command_queue, buffer, offset, size, - map_flags); - - void* map_ptr; - cl_int err = cmd->build(&map_ptr); - - // FIXME This error cannot occur for objects created with - // CL_MEM_USE_HOST_PTR or CL_MEM_ALLOC_HOST_PTR. - if (err != CL_SUCCESS) { - if (errcode_ret != nullptr) { - *errcode_ret = CL_MAP_FAILURE; - } - return nullptr; - } - - err = command_queue->enqueue_command_with_deps( - cmd, blocking_map, num_events_in_wait_list, event_wait_list, event); + cl_int err; + auto map_ptr = cvk_enqueue_map_buffer( + command_queue, buffer, blocking_map, offset, size, map_flags, + num_events_in_wait_list, event_wait_list, event, &err); if (errcode_ret != nullptr) { *errcode_ret = err; @@ -3849,14 +3864,20 @@ cl_int CLVK_API_CALL clEnqueueUnmapMemObject(cl_command_queue cq, cl_mem mem, if (memobj->is_image_type()) { auto image = static_cast(memobj); - auto cmd_unmap = std::make_unique( - command_queue, image, mapped_ptr, true); + if (image->is_backed_by_buffer_view()) { + auto buffer = static_cast(image->buffer()); + cmd = + new cvk_command_unmap_buffer(command_queue, buffer, mapped_ptr); + } else { + auto cmd_unmap = std::make_unique( + command_queue, image, mapped_ptr, true); - auto err = cmd_unmap->build(); - if (err != CL_SUCCESS) { - return err; + auto err = cmd_unmap->build(); + if (err != CL_SUCCESS) { + return err; + } + cmd = cmd_unmap.release(); } - cmd = cmd_unmap.release(); } else { auto buffer = static_cast(memobj); cmd = new cvk_command_unmap_buffer(command_queue, buffer, mapped_ptr); @@ -4820,9 +4841,9 @@ cl_int CLVK_API_CALL clGetSupportedImageFormats(cl_context context, " buffer : %s", vulkan_format_features_string(properties.bufferFeatures).c_str()); - cvk_debug( - "Required format features %s", - vulkan_format_features_string(required_format_feature_flags).c_str()); + cvk_debug("Required format features %s", + vulkan_format_features_string(required_format_feature_flags) + .c_str()); VkFormatFeatureFlags features; if (image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER) { features = properties.bufferFeatures; @@ -4832,11 +4853,10 @@ cl_int CLVK_API_CALL clGetSupportedImageFormats(cl_context context, } if ((features & required_format_feature_flags) == required_format_feature_flags) { - const cl_image_format& clfmt = clvkfmt.first; VkComponentMapping components_sampled, components_storage; - get_component_mappings_for_channel_order( - clfmt.image_channel_order, &components_sampled, - &components_storage); + get_component_mappings_for_channel_order(clfmt.image_channel_order, + &components_sampled, + &components_storage); if ((components_sampled != components_storage) && (image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER)) { continue; @@ -4985,10 +5005,23 @@ cl_int CLVK_API_CALL clEnqueueReadImage( return CL_INVALID_OPERATION; } - return cvk_enqueue_image_copy(command_queue, CL_COMMAND_READ_IMAGE, image, - blocking_read, origin, region, row_pitch, - slice_pitch, ptr, num_events_in_wait_list, - event_wait_list, event); + auto internal_image = static_cast(image); + if (internal_image->is_backed_by_buffer_view()) { + auto cmd = new cvk_command_buffer_host_copy( + command_queue, CL_COMMAND_READ_BUFFER, + static_cast(internal_image->buffer()), ptr, + origin[0] * internal_image->element_size(), + region[0] * internal_image->element_size()); + auto err = command_queue->enqueue_command_with_deps( + cmd, blocking_read, num_events_in_wait_list, event_wait_list, + event); + return err; + } else { + return cvk_enqueue_image_copy( + command_queue, CL_COMMAND_READ_IMAGE, image, blocking_read, origin, + region, row_pitch, slice_pitch, ptr, num_events_in_wait_list, + event_wait_list, event); + } } cl_int CLVK_API_CALL clEnqueueWriteImage( @@ -5048,11 +5081,24 @@ cl_int CLVK_API_CALL clEnqueueWriteImage( if (image->has_any_flag(CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS)) { return CL_INVALID_OPERATION; } - - return cvk_enqueue_image_copy( - command_queue, CL_COMMAND_WRITE_IMAGE, image, blocking_write, origin, - region, input_row_pitch, input_slice_pitch, const_cast(ptr), - num_events_in_wait_list, event_wait_list, event); + auto internal_image = static_cast(image); + if (internal_image->is_backed_by_buffer_view()) { + auto cmd = new cvk_command_buffer_host_copy( + command_queue, CL_COMMAND_WRITE_BUFFER, + static_cast(internal_image->buffer()), ptr, + origin[0] * internal_image->element_size(), + region[0] * internal_image->element_size()); + auto err = command_queue->enqueue_command_with_deps( + cmd, blocking_write, num_events_in_wait_list, event_wait_list, + event); + return err; + } else { + return cvk_enqueue_image_copy( + command_queue, CL_COMMAND_WRITE_IMAGE, image, blocking_write, + origin, region, input_row_pitch, input_slice_pitch, + const_cast(ptr), num_events_in_wait_list, event_wait_list, + event); + } } cl_int CLVK_API_CALL @@ -5122,6 +5168,7 @@ clEnqueueCopyImage(cl_command_queue cq, cl_mem src_image, cl_mem dst_image, if (!src_img->has_same_format(dst_img)) { return CL_IMAGE_FORMAT_MISMATCH; } + assert(src_img->element_size() == dst_img->element_size()); std::array src_orig = {src_origin[0], src_origin[1], src_origin[2]}; @@ -5129,11 +5176,41 @@ clEnqueueCopyImage(cl_command_queue cq, cl_mem src_image, cl_mem dst_image, dst_origin[2]}; std::array reg = {region[0], region[1], region[2]}; - auto cmd = std::make_unique( - command_queue, src_img, dst_img, src_orig, dst_orig, reg); + if (src_img->is_backed_by_buffer_view() && + dst_img->is_backed_by_buffer_view()) { + auto cmd = new cvk_command_copy_buffer( + command_queue, CL_COMMAND_COPY_BUFFER, + static_cast(src_img->buffer()), + static_cast(dst_img->buffer()), + src_origin[0] * src_img->element_size(), + dst_origin[0] * dst_img->element_size(), + region[0] * src_img->element_size()); + + return command_queue->enqueue_command_with_deps( + cmd, num_events_in_wait_list, event_wait_list, event); + } else if (src_img->is_backed_by_buffer_view()) { + auto cmd = std::make_unique( + CL_COMMAND_COPY_BUFFER_TO_IMAGE, command_queue, + static_cast(src_img->buffer()), dst_img, + src_origin[0] * src_img->element_size(), dst_orig, reg); + + return command_queue->enqueue_command_with_deps( + cmd.release(), num_events_in_wait_list, event_wait_list, event); + } else if (dst_img->is_backed_by_buffer_view()) { + auto cmd = std::make_unique( + CL_COMMAND_COPY_IMAGE_TO_BUFFER, command_queue, + static_cast(dst_img->buffer()), src_img, + dst_origin[0] * dst_img->element_size(), src_orig, reg); + + return command_queue->enqueue_command_with_deps( + cmd.release(), num_events_in_wait_list, event_wait_list, event); + } else { + auto cmd = std::make_unique( + command_queue, src_img, dst_img, src_orig, dst_orig, reg); - return command_queue->enqueue_command_with_deps( - cmd.release(), num_events_in_wait_list, event_wait_list, event); + return command_queue->enqueue_command_with_deps( + cmd.release(), num_events_in_wait_list, event_wait_list, event); + } } cl_int CLVK_API_CALL clEnqueueFillImage( @@ -5188,11 +5265,27 @@ cl_int CLVK_API_CALL clEnqueueFillImage( // TODO use Vulkan clear commands when possible // TODO use a shader when better + auto img = static_cast(image); + + // Fill + cvk_image::fill_pattern_array pattern; + size_t pattern_size; + img->prepare_fill_pattern(fill_color, pattern, &pattern_size); + + if (img->is_backed_by_buffer_view()) { + auto cmd = new cvk_command_fill_buffer( + command_queue, static_cast(img->buffer()), + origin[0] * img->element_size(), region[0] * img->element_size(), + pattern.data(), pattern_size); + + return command_queue->enqueue_command_with_deps( + cmd, num_events_in_wait_list, event_wait_list, event); + } + // Create image map command std::array orig = {origin[0], origin[1], origin[2]}; std::array reg = {region[0], region[1], region[2]}; - auto img = static_cast(image); auto cmd_map = std::make_unique( command_queue, img, orig, reg, CL_MAP_WRITE_INVALIDATE_REGION); void* map_ptr; @@ -5201,11 +5294,6 @@ cl_int CLVK_API_CALL clEnqueueFillImage( return err; } - // Fill - cvk_image::fill_pattern_array pattern; - size_t pattern_size; - img->prepare_fill_pattern(fill_color, pattern, &pattern_size); - auto cmd_fill = std::make_unique( command_queue, map_ptr, pattern, pattern_size, reg); @@ -5299,12 +5387,23 @@ cl_int CLVK_API_CALL clEnqueueCopyImageToBuffer( src_origin[2]}; std::array reg = {region[0], region[1], region[2]}; - auto cmd = std::make_unique( - CL_COMMAND_COPY_IMAGE_TO_BUFFER, command_queue, buffer, image, - dst_offset, origin, reg); + if (image->is_backed_by_buffer_view()) { + auto cmd = new cvk_command_copy_buffer( + command_queue, CL_COMMAND_COPY_BUFFER, + static_cast(image->buffer()), buffer, + src_origin[0] * image->element_size(), dst_offset, + region[0] * image->element_size()); - return command_queue->enqueue_command_with_deps( - cmd.release(), num_events_in_wait_list, event_wait_list, event); + return command_queue->enqueue_command_with_deps( + cmd, num_events_in_wait_list, event_wait_list, event); + } else { + auto cmd = std::make_unique( + CL_COMMAND_COPY_IMAGE_TO_BUFFER, command_queue, buffer, image, + dst_offset, origin, reg); + + return command_queue->enqueue_command_with_deps( + cmd.release(), num_events_in_wait_list, event_wait_list, event); + } } cl_int CLVK_API_CALL clEnqueueCopyBufferToImage( @@ -5375,12 +5474,23 @@ cl_int CLVK_API_CALL clEnqueueCopyBufferToImage( dst_origin[2]}; std::array reg = {region[0], region[1], region[2]}; - auto cmd = std::make_unique( - CL_COMMAND_COPY_BUFFER_TO_IMAGE, command_queue, buffer, image, - src_offset, origin, reg); + if (image->is_backed_by_buffer_view()) { + auto cmd = new cvk_command_copy_buffer( + command_queue, CL_COMMAND_COPY_BUFFER, buffer, + static_cast(image->buffer()), src_offset, + dst_origin[0] * image->element_size(), + region[0] * image->element_size()); - return command_queue->enqueue_command_with_deps( - cmd.release(), num_events_in_wait_list, event_wait_list, event); + return command_queue->enqueue_command_with_deps( + cmd, num_events_in_wait_list, event_wait_list, event); + } else { + auto cmd = std::make_unique( + CL_COMMAND_COPY_BUFFER_TO_IMAGE, command_queue, buffer, image, + src_offset, origin, reg); + + return command_queue->enqueue_command_with_deps( + cmd.release(), num_events_in_wait_list, event_wait_list, event); + } } void* cvk_enqueue_map_image(cl_command_queue cq, cl_mem img, @@ -5519,10 +5629,20 @@ void* CLVK_API_CALL clEnqueueMapImage( auto command_queue = icd_downcast(cq); cl_int err; - auto ret = cvk_enqueue_map_image(command_queue, image, blocking_map, - map_flags, origin, region, image_row_pitch, - image_slice_pitch, num_events_in_wait_list, - event_wait_list, event, &err); + void* ret; + auto img = static_cast(image); + if (img->is_backed_by_buffer_view()) { + ret = cvk_enqueue_map_buffer( + command_queue, static_cast(img->buffer()), + blocking_map, origin[0] * img->element_size(), + region[0] * img->element_size(), map_flags, num_events_in_wait_list, + event_wait_list, event, &err); + } else { + ret = cvk_enqueue_map_image(command_queue, image, blocking_map, + map_flags, origin, region, image_row_pitch, + image_slice_pitch, num_events_in_wait_list, + event_wait_list, event, &err); + } if (errcode_ret != nullptr) { *errcode_ret = err; diff --git a/src/memory.cpp b/src/memory.cpp index f9017413..639cabba 100644 --- a/src/memory.cpp +++ b/src/memory.cpp @@ -407,29 +407,24 @@ bool cvk_image::init_vulkan_image() { return false; } - if (m_desc.image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER) { - auto buffer = static_cast(m_desc.buffer); - m_memory = buffer->memory(); - buffer->retain(); - } else { - // Select memory type - cvk_device::allocation_parameters params = - device->select_memory_for(m_image); - if (params.memory_type_index == VK_MAX_MEMORY_TYPES) { - cvk_error_fn("Could not get memory type!"); - return false; - } + CVK_ASSERT(m_desc.image_type != CL_MEM_OBJECT_IMAGE1D_BUFFER); + // Select memory type + cvk_device::allocation_parameters params = + device->select_memory_for(m_image); + if (params.memory_type_index == VK_MAX_MEMORY_TYPES) { + cvk_error_fn("Could not get memory type!"); + return false; + } - // Allocate memory - m_memory = std::make_unique( - vkdev, params.size, params.memory_type_index); + // Allocate memory + m_memory = std::make_unique( + vkdev, params.size, params.memory_type_index); - res = m_memory->allocate(device->uses_physical_addressing()); + res = m_memory->allocate(device->uses_physical_addressing()); - if (res != VK_SUCCESS) { - cvk_error_fn("Could not allocate memory!"); - return false; - } + if (res != VK_SUCCESS) { + cvk_error_fn("Could not allocate memory!"); + return false; } // Bind the image to memory diff --git a/src/memory.hpp b/src/memory.hpp index 0d71a6ad..987a1e34 100644 --- a/src/memory.hpp +++ b/src/memory.hpp @@ -314,12 +314,13 @@ struct cvk_buffer : public cvk_mem { cvk_mem* create_subbuffer(cl_mem_flags, size_t origin, size_t size); VkBufferUsageFlags prepare_usage_flags() { - VkBufferUsageFlags usage_flags = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | - VK_BUFFER_USAGE_TRANSFER_DST_BIT | - VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | - VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | - VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | - VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT; + VkBufferUsageFlags usage_flags = + VK_BUFFER_USAGE_TRANSFER_SRC_BIT | + VK_BUFFER_USAGE_TRANSFER_DST_BIT | + VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | + VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | + VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | + VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT; if (m_context->device()->uses_physical_addressing()) { usage_flags |= VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT; } From 14766e4fbf1b8be3c51b2e2dac127af865f52cad Mon Sep 17 00:00:00 2001 From: Romaric Jodin Date: Mon, 16 Oct 2023 11:30:46 +0200 Subject: [PATCH 3/6] kpet feedback --- src/api.cpp | 92 ++++++++++++++++++-------------------------- src/device.hpp | 5 +++ src/memory.cpp | 3 +- src/queue.cpp | 4 +- src/queue.hpp | 25 ++++++++---- tests/api/images.cpp | 57 +++++++++++++++++++++++++-- tests/api/testcl.hpp | 27 +++++++++++++ 7 files changed, 146 insertions(+), 67 deletions(-) diff --git a/src/api.cpp b/src/api.cpp index 9bcbbf00..6ab4c9c9 100644 --- a/src/api.cpp +++ b/src/api.cpp @@ -710,9 +710,7 @@ cl_int CLVK_API_CALL clGetDeviceInfo(cl_device_id dev, size_ret = sizeof(val_uint); break; case CL_DEVICE_IMAGE_MAX_BUFFER_SIZE: - val_sizet = - std::min((uint64_t)device->vulkan_limits().maxTexelBufferElements, - device->max_mem_alloc_size()); + val_sizet = device->image_max_buffer_size(); copy_ptr = &val_sizet; size_ret = sizeof(val_sizet); break; @@ -3577,9 +3575,9 @@ cl_int CLVK_API_CALL clEnqueueFillBuffer( // TODO check sub-buffer alignment - auto cmd = new cvk_command_fill_buffer(command_queue, - static_cast(buffer), - offset, size, pattern, pattern_size); + auto cmd = new cvk_command_fill_buffer( + command_queue, static_cast(buffer), offset, size, pattern, + pattern_size, CL_COMMAND_FILL_BUFFER); return command_queue->enqueue_command_with_deps( cmd, num_events_in_wait_list, event_wait_list, event); @@ -3725,8 +3723,9 @@ void* cvk_enqueue_map_buffer(cvk_command_queue* cq, cvk_buffer* buffer, cl_map_flags map_flags, cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event, - cl_int* errcode_ret) { - auto cmd = new cvk_command_map_buffer(cq, buffer, offset, size, map_flags); + cl_int* errcode_ret, cl_command_type type) { + auto cmd = + new cvk_command_map_buffer(cq, buffer, offset, size, map_flags, type); void* map_ptr; cl_int err = cmd->build(&map_ptr); @@ -3830,7 +3829,8 @@ void* CLVK_API_CALL clEnqueueMapBuffer(cl_command_queue cq, cl_mem buf, cl_int err; auto map_ptr = cvk_enqueue_map_buffer( command_queue, buffer, blocking_map, offset, size, map_flags, - num_events_in_wait_list, event_wait_list, event, &err); + num_events_in_wait_list, event_wait_list, event, &err, + CL_COMMAND_MAP_BUFFER); if (errcode_ret != nullptr) { *errcode_ret = err; @@ -4890,6 +4890,16 @@ cl_int cvk_enqueue_image_copy( size_t slice_pitch, void* ptr, cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event) { + auto img = static_cast(image); + if (img->is_backed_by_buffer_view()) { + auto cmd = new cvk_command_buffer_host_copy( + queue, command_type, static_cast(img->buffer()), ptr, + origin[0] * img->element_size(), region[0] * img->element_size()); + auto err = queue->enqueue_command_with_deps( + cmd, blocking, num_events_in_wait_list, event_wait_list, event); + return err; + } + // Create image map command std::array orig = {origin[0], origin[1], origin[2]}; std::array reg = {region[0], region[1], region[2]}; @@ -4901,7 +4911,6 @@ cl_int cvk_enqueue_image_copy( map_flags = CL_MAP_READ; } - auto img = static_cast(image); auto cmd_map = std::make_unique(queue, img, orig, reg, map_flags); void* map_ptr; @@ -5005,23 +5014,10 @@ cl_int CLVK_API_CALL clEnqueueReadImage( return CL_INVALID_OPERATION; } - auto internal_image = static_cast(image); - if (internal_image->is_backed_by_buffer_view()) { - auto cmd = new cvk_command_buffer_host_copy( - command_queue, CL_COMMAND_READ_BUFFER, - static_cast(internal_image->buffer()), ptr, - origin[0] * internal_image->element_size(), - region[0] * internal_image->element_size()); - auto err = command_queue->enqueue_command_with_deps( - cmd, blocking_read, num_events_in_wait_list, event_wait_list, - event); - return err; - } else { - return cvk_enqueue_image_copy( - command_queue, CL_COMMAND_READ_IMAGE, image, blocking_read, origin, - region, row_pitch, slice_pitch, ptr, num_events_in_wait_list, - event_wait_list, event); - } + return cvk_enqueue_image_copy(command_queue, CL_COMMAND_READ_IMAGE, image, + blocking_read, origin, region, row_pitch, + slice_pitch, ptr, num_events_in_wait_list, + event_wait_list, event); } cl_int CLVK_API_CALL clEnqueueWriteImage( @@ -5081,24 +5077,11 @@ cl_int CLVK_API_CALL clEnqueueWriteImage( if (image->has_any_flag(CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS)) { return CL_INVALID_OPERATION; } - auto internal_image = static_cast(image); - if (internal_image->is_backed_by_buffer_view()) { - auto cmd = new cvk_command_buffer_host_copy( - command_queue, CL_COMMAND_WRITE_BUFFER, - static_cast(internal_image->buffer()), ptr, - origin[0] * internal_image->element_size(), - region[0] * internal_image->element_size()); - auto err = command_queue->enqueue_command_with_deps( - cmd, blocking_write, num_events_in_wait_list, event_wait_list, - event); - return err; - } else { - return cvk_enqueue_image_copy( - command_queue, CL_COMMAND_WRITE_IMAGE, image, blocking_write, - origin, region, input_row_pitch, input_slice_pitch, - const_cast(ptr), num_events_in_wait_list, event_wait_list, - event); - } + + return cvk_enqueue_image_copy( + command_queue, CL_COMMAND_WRITE_IMAGE, image, blocking_write, origin, + region, input_row_pitch, input_slice_pitch, const_cast(ptr), + num_events_in_wait_list, event_wait_list, event); } cl_int CLVK_API_CALL @@ -5168,7 +5151,6 @@ clEnqueueCopyImage(cl_command_queue cq, cl_mem src_image, cl_mem dst_image, if (!src_img->has_same_format(dst_img)) { return CL_IMAGE_FORMAT_MISMATCH; } - assert(src_img->element_size() == dst_img->element_size()); std::array src_orig = {src_origin[0], src_origin[1], src_origin[2]}; @@ -5179,7 +5161,7 @@ clEnqueueCopyImage(cl_command_queue cq, cl_mem src_image, cl_mem dst_image, if (src_img->is_backed_by_buffer_view() && dst_img->is_backed_by_buffer_view()) { auto cmd = new cvk_command_copy_buffer( - command_queue, CL_COMMAND_COPY_BUFFER, + command_queue, CL_COMMAND_COPY_IMAGE, static_cast(src_img->buffer()), static_cast(dst_img->buffer()), src_origin[0] * src_img->element_size(), @@ -5190,16 +5172,16 @@ clEnqueueCopyImage(cl_command_queue cq, cl_mem src_image, cl_mem dst_image, cmd, num_events_in_wait_list, event_wait_list, event); } else if (src_img->is_backed_by_buffer_view()) { auto cmd = std::make_unique( - CL_COMMAND_COPY_BUFFER_TO_IMAGE, command_queue, - static_cast(src_img->buffer()), dst_img, + CL_COMMAND_COPY_IMAGE, CL_COMMAND_COPY_BUFFER_TO_IMAGE, + command_queue, static_cast(src_img->buffer()), dst_img, src_origin[0] * src_img->element_size(), dst_orig, reg); return command_queue->enqueue_command_with_deps( cmd.release(), num_events_in_wait_list, event_wait_list, event); } else if (dst_img->is_backed_by_buffer_view()) { auto cmd = std::make_unique( - CL_COMMAND_COPY_IMAGE_TO_BUFFER, command_queue, - static_cast(dst_img->buffer()), src_img, + CL_COMMAND_COPY_IMAGE, CL_COMMAND_COPY_IMAGE_TO_BUFFER, + command_queue, static_cast(dst_img->buffer()), src_img, dst_origin[0] * dst_img->element_size(), src_orig, reg); return command_queue->enqueue_command_with_deps( @@ -5276,7 +5258,7 @@ cl_int CLVK_API_CALL clEnqueueFillImage( auto cmd = new cvk_command_fill_buffer( command_queue, static_cast(img->buffer()), origin[0] * img->element_size(), region[0] * img->element_size(), - pattern.data(), pattern_size); + pattern.data(), pattern_size, CL_COMMAND_FILL_IMAGE); return command_queue->enqueue_command_with_deps( cmd, num_events_in_wait_list, event_wait_list, event); @@ -5389,7 +5371,7 @@ cl_int CLVK_API_CALL clEnqueueCopyImageToBuffer( if (image->is_backed_by_buffer_view()) { auto cmd = new cvk_command_copy_buffer( - command_queue, CL_COMMAND_COPY_BUFFER, + command_queue, CL_COMMAND_COPY_IMAGE_TO_BUFFER, static_cast(image->buffer()), buffer, src_origin[0] * image->element_size(), dst_offset, region[0] * image->element_size()); @@ -5476,7 +5458,7 @@ cl_int CLVK_API_CALL clEnqueueCopyBufferToImage( if (image->is_backed_by_buffer_view()) { auto cmd = new cvk_command_copy_buffer( - command_queue, CL_COMMAND_COPY_BUFFER, buffer, + command_queue, CL_COMMAND_COPY_BUFFER_TO_IMAGE, buffer, static_cast(image->buffer()), src_offset, dst_origin[0] * image->element_size(), region[0] * image->element_size()); @@ -5636,7 +5618,7 @@ void* CLVK_API_CALL clEnqueueMapImage( command_queue, static_cast(img->buffer()), blocking_map, origin[0] * img->element_size(), region[0] * img->element_size(), map_flags, num_events_in_wait_list, - event_wait_list, event, &err); + event_wait_list, event, &err, CL_COMMAND_MAP_IMAGE); } else { ret = cvk_enqueue_map_image(command_queue, image, blocking_map, map_flags, origin, region, image_row_pitch, diff --git a/src/device.hpp b/src/device.hpp index d988b3ec..91ed7bc3 100644 --- a/src/device.hpp +++ b/src/device.hpp @@ -248,6 +248,11 @@ struct cvk_device : public _cl_device_id, return maxAllocSz; } + size_t image_max_buffer_size() const { + return std::min((uint64_t)vulkan_limits().maxTexelBufferElements, + max_mem_alloc_size()); + } + cl_uint mem_base_addr_align() const { // The OpenCL spec requires at least 1024 bits (long16's alignment) uint32_t required_by_vulkan_impl = diff --git a/src/memory.cpp b/src/memory.cpp index 639cabba..016af67a 100644 --- a/src/memory.cpp +++ b/src/memory.cpp @@ -509,6 +509,7 @@ bool cvk_image::init_vulkan_texel_buffer() { CVK_ASSERT(buffer()->is_buffer_type()); auto vkbuf = static_cast(buffer())->vulkan_buffer(); + auto offset = static_cast(buffer())->vulkan_buffer_offset(); VkBufferViewCreateInfo createInfo = { VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO, @@ -516,7 +517,7 @@ bool cvk_image::init_vulkan_texel_buffer() { 0, // flags vkbuf, // buffer format, // format - 0, // offset + offset, // offset VK_WHOLE_SIZE // range }; diff --git a/src/queue.cpp b/src/queue.cpp index 18ca429b..536347fc 100644 --- a/src/queue.cpp +++ b/src/queue.cpp @@ -1192,9 +1192,11 @@ cl_int cvk_command_buffer_host_copy::do_action() { bool success = false; switch (m_type) { + case CL_COMMAND_WRITE_IMAGE: case CL_COMMAND_WRITE_BUFFER: success = m_buffer->copy_from(m_ptr, m_offset, m_size); break; + case CL_COMMAND_READ_IMAGE: case CL_COMMAND_READ_BUFFER: success = m_buffer->copy_to(m_ptr, m_offset, m_size); break; @@ -1669,7 +1671,7 @@ cl_int cvk_command_buffer_image_copy::build_batchable_inner( VkBufferImageCopy region = prepare_buffer_image_copy(m_image, m_offset, m_origin, m_region); - switch (type()) { + switch (m_copy_type) { case CL_COMMAND_COPY_IMAGE_TO_BUFFER: case CL_COMMAND_MAP_IMAGE: build_inner_image_to_buffer(cmdbuf, region); diff --git a/src/queue.hpp b/src/queue.hpp index 134e8d55..329fed32 100644 --- a/src/queue.hpp +++ b/src/queue.hpp @@ -627,9 +627,8 @@ struct cvk_command_fill_buffer final : public cvk_command_buffer_base_region { cvk_command_fill_buffer(cvk_command_queue* q, cvk_buffer* buffer, size_t offset, size_t size, const void* pattern, - size_t pattern_size) - : cvk_command_buffer_base_region(q, CL_COMMAND_FILL_BUFFER, buffer, - offset, size), + size_t pattern_size, cl_command_type type) + : cvk_command_buffer_base_region(q, type, buffer, offset, size), m_pattern_size(pattern_size) { memcpy(m_pattern.data(), pattern, pattern_size); } @@ -883,9 +882,9 @@ struct cvk_command_batch : public cvk_command { struct cvk_command_map_buffer final : public cvk_command_buffer_base_region { cvk_command_map_buffer(cvk_command_queue* queue, cvk_buffer* buffer, - size_t offset, size_t size, cl_map_flags flags) - : cvk_command_buffer_base_region(queue, CL_COMMAND_MAP_BUFFER, buffer, - offset, size), + size_t offset, size_t size, cl_map_flags flags, + cl_command_type type) + : cvk_command_buffer_base_region(queue, type, buffer, offset, size), m_flags(flags), m_mapping_needs_releasing_on_destruction(false) {} ~cvk_command_map_buffer() { if (m_mapping_needs_releasing_on_destruction) { @@ -937,7 +936,18 @@ struct cvk_command_buffer_image_copy final : public cvk_command_batchable { const std::array& origin, const std::array& region) : cvk_command_batchable(type, queue), m_buffer(buffer), m_image(image), - m_offset(offset), m_origin(origin), m_region(region) {} + m_offset(offset), m_origin(origin), m_region(region), + m_copy_type(type) {} + + cvk_command_buffer_image_copy(cl_command_type type, + cl_command_type copy_type, + cvk_command_queue* queue, cvk_buffer* buffer, + cvk_image* image, size_t offset, + const std::array& origin, + const std::array& region) + : cvk_command_batchable(type, queue), m_buffer(buffer), m_image(image), + m_offset(offset), m_origin(origin), m_region(region), + m_copy_type(copy_type) {} CHECK_RETURN cl_int build_batchable_inner(cvk_command_buffer& cmdbuf) override final; @@ -957,6 +967,7 @@ struct cvk_command_buffer_image_copy final : public cvk_command_batchable { size_t m_offset; std::array m_origin; std::array m_region; + cl_command_type m_copy_type; }; struct cvk_command_combine final : public cvk_command { diff --git a/tests/api/images.cpp b/tests/api/images.cpp index 1c1427f4..107e3e51 100644 --- a/tests/api/images.cpp +++ b/tests/api/images.cpp @@ -403,17 +403,16 @@ TEST_F(WithCommandQueue, ImageWriteOffset) { } TEST_F(WithContext, Image1DBuffer) { - const size_t IMAGE_HEIGHT = 128; const size_t IMAGE_WIDTH = 128; - auto buffer_size = IMAGE_HEIGHT * IMAGE_WIDTH * sizeof(cl_float4); + auto buffer_size = IMAGE_WIDTH * sizeof(cl_float4); auto buffer = CreateBuffer(CL_MEM_READ_WRITE, buffer_size, nullptr); cl_image_format format = {CL_RGBA, CL_FLOAT}; cl_image_desc desc = { CL_MEM_OBJECT_IMAGE1D_BUFFER, // image_type IMAGE_WIDTH, // image_width - IMAGE_HEIGHT, // image_height + 1, // image_height 1, // image_depth 1, // image_array_size 0, // image_row_pitch @@ -594,3 +593,55 @@ kernel void test(global uint* dst, uint magic, image2d_t read_only image, uint o (dst[2] == (offset + magic))); } } + +TEST_F(WithCommandQueue, 1DBufferImageFromSubBuffer) { + const size_t IMAGE_WIDTH = 128; + + auto subbuffer_size = IMAGE_WIDTH * sizeof(cl_float4); + auto buffer_size = subbuffer_size + 2 * sizeof(cl_float4); + auto buffer = CreateBuffer(CL_MEM_READ_WRITE, buffer_size, nullptr); + auto subbuffer = + CreateSubBuffer(buffer, 0, sizeof(cl_float4), subbuffer_size); + + cl_image_format format = {CL_RGBA, CL_FLOAT}; + cl_image_desc desc = { + CL_MEM_OBJECT_IMAGE1D_BUFFER, // image_type + IMAGE_WIDTH, // image_width + 1, // image_height + 1, // image_depth + 1, // image_array_size + 0, // image_row_pitch + 0, // image_slice_pitch + 0, // num_mip_levels + 0, // num_samples + subbuffer, // buffer + }; + + auto image = CreateImage(CL_MEM_READ_WRITE, &format, &desc); + + const char* source = R"( +kernel void test(image1d_buffer_t write_only image) +{ + int gid = get_global_id(0); + write_imagef(image, gid, (float4)(0.0, 0.0, 0.0, 0.0)); +} +)"; + auto kernel = CreateKernel(source, "test"); + SetKernelArg(kernel, 0, image); + cl_float pattern = -42.0; + EnqueueFillBuffer(buffer, &pattern, sizeof(pattern), buffer_size); + EnqueueNDRangeKernel(kernel, 1, nullptr, &IMAGE_WIDTH, nullptr); + + cl_float4 output[IMAGE_WIDTH + 2]; + EnqueueReadBuffer(buffer, CL_TRUE, 0, buffer_size, output); + EXPECT_TRUE(output[0].s0 == pattern && output[0].s1 == pattern && + output[0].s2 == pattern && output[0].s3 == pattern && + output[IMAGE_WIDTH + 1].s0 == pattern && + output[IMAGE_WIDTH + 1].s1 == pattern && + output[IMAGE_WIDTH + 1].s2 == pattern && + output[IMAGE_WIDTH + 1].s3 == pattern); + for (unsigned i = 0; i < IMAGE_WIDTH; i++) { + EXPECT_TRUE(output[i + 1].s0 == 0.0 && output[i + 1].s1 == 0.0 && + output[i + 1].s2 == 0.0 && output[i + 1].s3 == 0.0); + } +} diff --git a/tests/api/testcl.hpp b/tests/api/testcl.hpp index 51040a2f..2966d455 100644 --- a/tests/api/testcl.hpp +++ b/tests/api/testcl.hpp @@ -405,6 +405,19 @@ class WithContext : public ::testing::Test { return mem; } + holder CreateSubBuffer(cl_mem buffer, cl_mem_flags flags, + const size_t region_origin, const size_t region_size) { + cl_int err; + cl_buffer_region buffer_region = { + .origin = region_origin, + .size = region_size, + }; + auto mem = clCreateSubBuffer( + buffer, flags, CL_BUFFER_CREATE_TYPE_REGION, &buffer_region, &err); + EXPECT_CL_SUCCESS(err); + return mem; + } + holder CreateSampler(cl_bool normalized_coords, cl_addressing_mode addressing_mode, cl_filter_mode filter_mode) { @@ -656,6 +669,20 @@ class WithCommandQueue : public WithContext { nullptr, nullptr); } + void EnqueueFillBuffer(cl_mem buffer, const void* pattern, + size_t pattern_size, size_t offset, size_t size) { + auto err = clEnqueueFillBuffer(m_queue, buffer, pattern, pattern_size, + offset, size, 0, nullptr, nullptr); + ASSERT_CL_SUCCESS(err); + } + + void EnqueueFillBuffer(cl_mem buffer, const void* pattern, + size_t pattern_size, size_t size) { + auto err = clEnqueueFillBuffer(m_queue, buffer, pattern, pattern_size, + 0, size, 0, nullptr, nullptr); + ASSERT_CL_SUCCESS(err); + } + void EnqueueReadImage(cl_mem image, cl_bool blocking_read, const size_t* origin, const size_t* region, size_t row_pitch, size_t slice_pitch, void* ptr, From 923bd444969c04efa110f6ec3a5aa0eb85947fb1 Mon Sep 17 00:00:00 2001 From: Romaric Jodin Date: Mon, 16 Oct 2023 16:44:26 +0200 Subject: [PATCH 4/6] fix formatting --- tests/api/testcl.hpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/api/testcl.hpp b/tests/api/testcl.hpp index 2966d455..7dd1a73f 100644 --- a/tests/api/testcl.hpp +++ b/tests/api/testcl.hpp @@ -406,7 +406,8 @@ class WithContext : public ::testing::Test { } holder CreateSubBuffer(cl_mem buffer, cl_mem_flags flags, - const size_t region_origin, const size_t region_size) { + const size_t region_origin, + const size_t region_size) { cl_int err; cl_buffer_region buffer_region = { .origin = region_origin, From 7feed5f006e2db9bca0c1b5d3f11bc0d20a6de03 Mon Sep 17 00:00:00 2001 From: Romaric Jodin Date: Thu, 19 Oct 2023 18:10:19 +0200 Subject: [PATCH 5/6] update clspv --- external/clspv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/clspv b/external/clspv index 863f8a16..6bfb8fbc 160000 --- a/external/clspv +++ b/external/clspv @@ -1 +1 @@ -Subproject commit 863f8a16f8ff57d056d82b9ef25c0cf03fa2dd4d +Subproject commit 6bfb8fbc67507f27b111ae912ce03dd707096667 From 48be29588e7f7c1ef94efbb7e43571e819b4b2cb Mon Sep 17 00:00:00 2001 From: Romaric Jodin Date: Mon, 23 Oct 2023 16:33:17 +0200 Subject: [PATCH 6/6] use bigger padding in test --- tests/api/images.cpp | 34 +++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/tests/api/images.cpp b/tests/api/images.cpp index 107e3e51..4643f456 100644 --- a/tests/api/images.cpp +++ b/tests/api/images.cpp @@ -596,12 +596,16 @@ kernel void test(global uint* dst, uint magic, image2d_t read_only image, uint o TEST_F(WithCommandQueue, 1DBufferImageFromSubBuffer) { const size_t IMAGE_WIDTH = 128; + const unsigned long nb_prefix_elements = 16; + const unsigned long nb_suffix_elements = 16; auto subbuffer_size = IMAGE_WIDTH * sizeof(cl_float4); - auto buffer_size = subbuffer_size + 2 * sizeof(cl_float4); + auto buffer_size = + subbuffer_size + + (nb_prefix_elements + nb_suffix_elements) * sizeof(cl_float4); auto buffer = CreateBuffer(CL_MEM_READ_WRITE, buffer_size, nullptr); - auto subbuffer = - CreateSubBuffer(buffer, 0, sizeof(cl_float4), subbuffer_size); + auto subbuffer = CreateSubBuffer( + buffer, 0, nb_prefix_elements * sizeof(cl_float4), subbuffer_size); cl_image_format format = {CL_RGBA, CL_FLOAT}; cl_image_desc desc = { @@ -632,16 +636,20 @@ kernel void test(image1d_buffer_t write_only image) EnqueueFillBuffer(buffer, &pattern, sizeof(pattern), buffer_size); EnqueueNDRangeKernel(kernel, 1, nullptr, &IMAGE_WIDTH, nullptr); - cl_float4 output[IMAGE_WIDTH + 2]; + cl_float4 output[IMAGE_WIDTH + nb_prefix_elements + nb_suffix_elements]; EnqueueReadBuffer(buffer, CL_TRUE, 0, buffer_size, output); - EXPECT_TRUE(output[0].s0 == pattern && output[0].s1 == pattern && - output[0].s2 == pattern && output[0].s3 == pattern && - output[IMAGE_WIDTH + 1].s0 == pattern && - output[IMAGE_WIDTH + 1].s1 == pattern && - output[IMAGE_WIDTH + 1].s2 == pattern && - output[IMAGE_WIDTH + 1].s3 == pattern); - for (unsigned i = 0; i < IMAGE_WIDTH; i++) { - EXPECT_TRUE(output[i + 1].s0 == 0.0 && output[i + 1].s1 == 0.0 && - output[i + 1].s2 == 0.0 && output[i + 1].s3 == 0.0); + for (unsigned i = 0; i < nb_prefix_elements; i++) { + EXPECT_TRUE(output[i].s0 == pattern && output[i].s1 == pattern && + output[i].s2 == pattern && output[i].s3 == pattern); + } + for (unsigned i = nb_prefix_elements; i < IMAGE_WIDTH + nb_prefix_elements; + i++) { + EXPECT_TRUE(output[i].s0 == 0.0 && output[i].s1 == 0.0 && + output[i].s2 == 0.0 && output[i].s3 == 0.0); + } + for (unsigned i = IMAGE_WIDTH + nb_prefix_elements; + i < IMAGE_WIDTH + nb_prefix_elements + nb_suffix_elements; i++) { + EXPECT_TRUE(output[i].s0 == pattern && output[i].s1 == pattern && + output[i].s2 == pattern && output[i].s3 == pattern); } }