diff --git a/external/clspv b/external/clspv index 863f8a16..6bfb8fbc 160000 --- a/external/clspv +++ b/external/clspv @@ -1 +1 @@ -Subproject commit 863f8a16f8ff57d056d82b9ef25c0cf03fa2dd4d +Subproject commit 6bfb8fbc67507f27b111ae912ce03dd707096667 diff --git a/src/api.cpp b/src/api.cpp index 91d201ad..6ab4c9c9 100644 --- a/src/api.cpp +++ b/src/api.cpp @@ -710,7 +710,7 @@ cl_int CLVK_API_CALL clGetDeviceInfo(cl_device_id dev, size_ret = sizeof(val_uint); break; case CL_DEVICE_IMAGE_MAX_BUFFER_SIZE: - val_sizet = device->vulkan_limits().maxImageDimension1D; + val_sizet = device->image_max_buffer_size(); copy_ptr = &val_sizet; size_ret = sizeof(val_sizet); break; @@ -3575,9 +3575,9 @@ cl_int CLVK_API_CALL clEnqueueFillBuffer( // TODO check sub-buffer alignment - auto cmd = new cvk_command_fill_buffer(command_queue, - static_cast(buffer), - offset, size, pattern, pattern_size); + auto cmd = new cvk_command_fill_buffer( + command_queue, static_cast(buffer), offset, size, pattern, + pattern_size, CL_COMMAND_FILL_BUFFER); return command_queue->enqueue_command_with_deps( cmd, num_events_in_wait_list, event_wait_list, event); @@ -3718,6 +3718,33 @@ cl_int CLVK_API_CALL clEnqueueCopyBufferRect( cmd, num_events_in_wait_list, event_wait_list, event); } +void* cvk_enqueue_map_buffer(cvk_command_queue* cq, cvk_buffer* buffer, + cl_bool blocking_map, size_t offset, size_t size, + cl_map_flags map_flags, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, cl_event* event, + cl_int* errcode_ret, cl_command_type type) { + auto cmd = + new cvk_command_map_buffer(cq, buffer, offset, size, map_flags, type); + + void* map_ptr; + cl_int err = cmd->build(&map_ptr); + + // FIXME This error cannot occur for objects created with + // CL_MEM_USE_HOST_PTR or CL_MEM_ALLOC_HOST_PTR. + if (err != CL_SUCCESS) { + *errcode_ret = CL_MAP_FAILURE; + return nullptr; + } + + err = cq->enqueue_command_with_deps( + cmd, blocking_map, num_events_in_wait_list, event_wait_list, event); + + *errcode_ret = err; + + return map_ptr; +} + void* CLVK_API_CALL clEnqueueMapBuffer(cl_command_queue cq, cl_mem buf, cl_bool blocking_map, cl_map_flags map_flags, size_t offset, @@ -3799,23 +3826,11 @@ void* CLVK_API_CALL clEnqueueMapBuffer(cl_command_queue cq, cl_mem buf, return nullptr; } - auto cmd = new cvk_command_map_buffer(command_queue, buffer, offset, size, - map_flags); - - void* map_ptr; - cl_int err = cmd->build(&map_ptr); - - // FIXME This error cannot occur for objects created with - // CL_MEM_USE_HOST_PTR or CL_MEM_ALLOC_HOST_PTR. - if (err != CL_SUCCESS) { - if (errcode_ret != nullptr) { - *errcode_ret = CL_MAP_FAILURE; - } - return nullptr; - } - - err = command_queue->enqueue_command_with_deps( - cmd, blocking_map, num_events_in_wait_list, event_wait_list, event); + cl_int err; + auto map_ptr = cvk_enqueue_map_buffer( + command_queue, buffer, blocking_map, offset, size, map_flags, + num_events_in_wait_list, event_wait_list, event, &err, + CL_COMMAND_MAP_BUFFER); if (errcode_ret != nullptr) { *errcode_ret = err; @@ -3849,14 +3864,20 @@ cl_int CLVK_API_CALL clEnqueueUnmapMemObject(cl_command_queue cq, cl_mem mem, if (memobj->is_image_type()) { auto image = static_cast(memobj); - auto cmd_unmap = std::make_unique( - command_queue, image, mapped_ptr, true); + if (image->is_backed_by_buffer_view()) { + auto buffer = static_cast(image->buffer()); + cmd = + new cvk_command_unmap_buffer(command_queue, buffer, mapped_ptr); + } else { + auto cmd_unmap = std::make_unique( + command_queue, image, mapped_ptr, true); - auto err = cmd_unmap->build(); - if (err != CL_SUCCESS) { - return err; + auto err = cmd_unmap->build(); + if (err != CL_SUCCESS) { + return err; + } + cmd = cmd_unmap.release(); } - cmd = cmd_unmap.release(); } else { auto buffer = static_cast(memobj); cmd = new cvk_command_unmap_buffer(command_queue, buffer, mapped_ptr); @@ -4718,22 +4739,13 @@ std::unordered_map(image); + if (img->is_backed_by_buffer_view()) { + auto cmd = new cvk_command_buffer_host_copy( + queue, command_type, static_cast(img->buffer()), ptr, + origin[0] * img->element_size(), region[0] * img->element_size()); + auto err = queue->enqueue_command_with_deps( + cmd, blocking, num_events_in_wait_list, event_wait_list, event); + return err; + } + // Create image map command std::array orig = {origin[0], origin[1], origin[2]}; std::array reg = {region[0], region[1], region[2]}; @@ -4861,7 +4911,6 @@ cl_int cvk_enqueue_image_copy( map_flags = CL_MAP_READ; } - auto img = static_cast(image); auto cmd_map = std::make_unique(queue, img, orig, reg, map_flags); void* map_ptr; @@ -5109,11 +5158,41 @@ clEnqueueCopyImage(cl_command_queue cq, cl_mem src_image, cl_mem dst_image, dst_origin[2]}; std::array reg = {region[0], region[1], region[2]}; - auto cmd = std::make_unique( - command_queue, src_img, dst_img, src_orig, dst_orig, reg); + if (src_img->is_backed_by_buffer_view() && + dst_img->is_backed_by_buffer_view()) { + auto cmd = new cvk_command_copy_buffer( + command_queue, CL_COMMAND_COPY_IMAGE, + static_cast(src_img->buffer()), + static_cast(dst_img->buffer()), + src_origin[0] * src_img->element_size(), + dst_origin[0] * dst_img->element_size(), + region[0] * src_img->element_size()); + + return command_queue->enqueue_command_with_deps( + cmd, num_events_in_wait_list, event_wait_list, event); + } else if (src_img->is_backed_by_buffer_view()) { + auto cmd = std::make_unique( + CL_COMMAND_COPY_IMAGE, CL_COMMAND_COPY_BUFFER_TO_IMAGE, + command_queue, static_cast(src_img->buffer()), dst_img, + src_origin[0] * src_img->element_size(), dst_orig, reg); + + return command_queue->enqueue_command_with_deps( + cmd.release(), num_events_in_wait_list, event_wait_list, event); + } else if (dst_img->is_backed_by_buffer_view()) { + auto cmd = std::make_unique( + CL_COMMAND_COPY_IMAGE, CL_COMMAND_COPY_IMAGE_TO_BUFFER, + command_queue, static_cast(dst_img->buffer()), src_img, + dst_origin[0] * dst_img->element_size(), src_orig, reg); + + return command_queue->enqueue_command_with_deps( + cmd.release(), num_events_in_wait_list, event_wait_list, event); + } else { + auto cmd = std::make_unique( + command_queue, src_img, dst_img, src_orig, dst_orig, reg); - return command_queue->enqueue_command_with_deps( - cmd.release(), num_events_in_wait_list, event_wait_list, event); + return command_queue->enqueue_command_with_deps( + cmd.release(), num_events_in_wait_list, event_wait_list, event); + } } cl_int CLVK_API_CALL clEnqueueFillImage( @@ -5168,11 +5247,27 @@ cl_int CLVK_API_CALL clEnqueueFillImage( // TODO use Vulkan clear commands when possible // TODO use a shader when better + auto img = static_cast(image); + + // Fill + cvk_image::fill_pattern_array pattern; + size_t pattern_size; + img->prepare_fill_pattern(fill_color, pattern, &pattern_size); + + if (img->is_backed_by_buffer_view()) { + auto cmd = new cvk_command_fill_buffer( + command_queue, static_cast(img->buffer()), + origin[0] * img->element_size(), region[0] * img->element_size(), + pattern.data(), pattern_size, CL_COMMAND_FILL_IMAGE); + + return command_queue->enqueue_command_with_deps( + cmd, num_events_in_wait_list, event_wait_list, event); + } + // Create image map command std::array orig = {origin[0], origin[1], origin[2]}; std::array reg = {region[0], region[1], region[2]}; - auto img = static_cast(image); auto cmd_map = std::make_unique( command_queue, img, orig, reg, CL_MAP_WRITE_INVALIDATE_REGION); void* map_ptr; @@ -5181,11 +5276,6 @@ cl_int CLVK_API_CALL clEnqueueFillImage( return err; } - // Fill - cvk_image::fill_pattern_array pattern; - size_t pattern_size; - img->prepare_fill_pattern(fill_color, pattern, &pattern_size); - auto cmd_fill = std::make_unique( command_queue, map_ptr, pattern, pattern_size, reg); @@ -5279,12 +5369,23 @@ cl_int CLVK_API_CALL clEnqueueCopyImageToBuffer( src_origin[2]}; std::array reg = {region[0], region[1], region[2]}; - auto cmd = std::make_unique( - CL_COMMAND_COPY_IMAGE_TO_BUFFER, command_queue, buffer, image, - dst_offset, origin, reg); + if (image->is_backed_by_buffer_view()) { + auto cmd = new cvk_command_copy_buffer( + command_queue, CL_COMMAND_COPY_IMAGE_TO_BUFFER, + static_cast(image->buffer()), buffer, + src_origin[0] * image->element_size(), dst_offset, + region[0] * image->element_size()); - return command_queue->enqueue_command_with_deps( - cmd.release(), num_events_in_wait_list, event_wait_list, event); + return command_queue->enqueue_command_with_deps( + cmd, num_events_in_wait_list, event_wait_list, event); + } else { + auto cmd = std::make_unique( + CL_COMMAND_COPY_IMAGE_TO_BUFFER, command_queue, buffer, image, + dst_offset, origin, reg); + + return command_queue->enqueue_command_with_deps( + cmd.release(), num_events_in_wait_list, event_wait_list, event); + } } cl_int CLVK_API_CALL clEnqueueCopyBufferToImage( @@ -5355,12 +5456,23 @@ cl_int CLVK_API_CALL clEnqueueCopyBufferToImage( dst_origin[2]}; std::array reg = {region[0], region[1], region[2]}; - auto cmd = std::make_unique( - CL_COMMAND_COPY_BUFFER_TO_IMAGE, command_queue, buffer, image, - src_offset, origin, reg); + if (image->is_backed_by_buffer_view()) { + auto cmd = new cvk_command_copy_buffer( + command_queue, CL_COMMAND_COPY_BUFFER_TO_IMAGE, buffer, + static_cast(image->buffer()), src_offset, + dst_origin[0] * image->element_size(), + region[0] * image->element_size()); - return command_queue->enqueue_command_with_deps( - cmd.release(), num_events_in_wait_list, event_wait_list, event); + return command_queue->enqueue_command_with_deps( + cmd, num_events_in_wait_list, event_wait_list, event); + } else { + auto cmd = std::make_unique( + CL_COMMAND_COPY_BUFFER_TO_IMAGE, command_queue, buffer, image, + src_offset, origin, reg); + + return command_queue->enqueue_command_with_deps( + cmd.release(), num_events_in_wait_list, event_wait_list, event); + } } void* cvk_enqueue_map_image(cl_command_queue cq, cl_mem img, @@ -5499,10 +5611,20 @@ void* CLVK_API_CALL clEnqueueMapImage( auto command_queue = icd_downcast(cq); cl_int err; - auto ret = cvk_enqueue_map_image(command_queue, image, blocking_map, - map_flags, origin, region, image_row_pitch, - image_slice_pitch, num_events_in_wait_list, - event_wait_list, event, &err); + void* ret; + auto img = static_cast(image); + if (img->is_backed_by_buffer_view()) { + ret = cvk_enqueue_map_buffer( + command_queue, static_cast(img->buffer()), + blocking_map, origin[0] * img->element_size(), + region[0] * img->element_size(), map_flags, num_events_in_wait_list, + event_wait_list, event, &err, CL_COMMAND_MAP_IMAGE); + } else { + ret = cvk_enqueue_map_image(command_queue, image, blocking_map, + map_flags, origin, region, image_row_pitch, + image_slice_pitch, num_events_in_wait_list, + event_wait_list, event, &err); + } if (errcode_ret != nullptr) { *errcode_ret = err; diff --git a/src/device.cpp b/src/device.cpp index 7af999c5..03b528b7 100644 --- a/src/device.cpp +++ b/src/device.cpp @@ -1042,6 +1042,7 @@ bool cvk_device::supports_capability(spv::Capability capability) const { case spv::CapabilityImage1D: case spv::CapabilityImageQuery: case spv::CapabilityImageBuffer: + case spv::CapabilitySampledBuffer: return true; // Optional capabilities: case spv::CapabilityFloat16: diff --git a/src/device.hpp b/src/device.hpp index d988b3ec..91ed7bc3 100644 --- a/src/device.hpp +++ b/src/device.hpp @@ -248,6 +248,11 @@ struct cvk_device : public _cl_device_id, return maxAllocSz; } + size_t image_max_buffer_size() const { + return std::min((uint64_t)vulkan_limits().maxTexelBufferElements, + max_mem_alloc_size()); + } + cl_uint mem_base_addr_align() const { // The OpenCL spec requires at least 1024 bits (long16's alignment) uint32_t required_by_vulkan_impl = diff --git a/src/kernel.cpp b/src/kernel.cpp index cf0ba472..09c5fceb 100644 --- a/src/kernel.cpp +++ b/src/kernel.cpp @@ -119,7 +119,9 @@ cl_int cvk_kernel::set_arg(cl_uint index, size_t size, const void* value) { // if the argument is an image, we need to set its metadata // (channel_order/channel_data_type). if (arg.kind == kernel_argument_kind::sampled_image || - arg.kind == kernel_argument_kind::storage_image) { + arg.kind == kernel_argument_kind::storage_image || + arg.kind == kernel_argument_kind::storage_texel_buffer || + arg.kind == kernel_argument_kind::uniform_texel_buffer) { set_image_metadata(index, value); } @@ -155,9 +157,11 @@ bool cvk_kernel_argument_values::setup_descriptor_sets() { std::vector descriptor_writes; std::vector buffer_info; std::vector image_info; + std::vector buffer_views; descriptor_writes.reserve(max_descriptor_writes); buffer_info.reserve(max_descriptor_writes); image_info.reserve(max_descriptor_writes); + buffer_views.reserve(max_descriptor_writes); // Setup module-scope variables if (program->module_constant_data_buffer() != nullptr && @@ -324,6 +328,36 @@ bool cvk_kernel_argument_values::setup_descriptor_sets() { descriptor_writes.push_back(writeDescriptorSet); break; } + case kernel_argument_kind::storage_texel_buffer: + case kernel_argument_kind::uniform_texel_buffer: { + auto image = static_cast(get_arg_value(arg)); + bool uniform = + arg.kind == kernel_argument_kind::uniform_texel_buffer; + auto view = image->vulkan_buffer_view(); + buffer_views.push_back(view); + + cvk_debug_fn("buffer view %p @ set = %u, binding = %u", view, + arg.descriptorSet, arg.binding); + + VkDescriptorType dtype = + uniform ? VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER + : VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; + + VkWriteDescriptorSet writeDescriptorSet = { + VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + nullptr, + ds[arg.descriptorSet], + arg.binding, // dstBinding + 0, // dstArrayElement + 1, // descriptorCount + dtype, + nullptr, // pImageInfo + nullptr, // pBufferInfo + &buffer_views.back(), // pTexelBufferView + }; + descriptor_writes.push_back(writeDescriptorSet); + break; + } case kernel_argument_kind::pod: // skip POD arguments case kernel_argument_kind::pod_ubo: case kernel_argument_kind::pod_pushconstant: diff --git a/src/memory.cpp b/src/memory.cpp index 5688e45b..016af67a 100644 --- a/src/memory.cpp +++ b/src/memory.cpp @@ -243,22 +243,33 @@ bool cvk_sampler::init() { VkFormatFeatureFlags cvk_image::required_format_feature_flags_for(cl_mem_object_type type, cl_mem_flags flags) { - UNUSED(type); // TODO will be required for 1D buffer images - // All images require TRANSFER_SRC, TRANSFER_DST + // 1Dbuffer requires + // RW / RaW: STORAGE_TEXEL_BUFFER + // RO: UNIFORM_TEXEL_BUFFER + // All other images require TRANSFER_SRC, TRANSFER_DST // read-only: SAMPLED_IMAGE, SAMPLED_IMAGE_FILTER_LINEAR // write-only: STORAGE_IMAGE // read-write: STORAGE_IMAGE, SAMPLED_IMAGE, SAMPLED_IMAGE_FILTER_LINEAR // read-and-write: STORAGE_IMAGE VkFormatFeatureFlags format_feature_flags = 0; - format_feature_flags = - VK_FORMAT_FEATURE_TRANSFER_SRC_BIT | VK_FORMAT_FEATURE_TRANSFER_DST_BIT; - + if (type != CL_MEM_OBJECT_IMAGE1D_BUFFER) { + format_feature_flags = VK_FORMAT_FEATURE_TRANSFER_SRC_BIT | + VK_FORMAT_FEATURE_TRANSFER_DST_BIT; + } VkFormatFeatureFlags format_feature_flags_RO; - format_feature_flags_RO = VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | - VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT; - + if (type == CL_MEM_OBJECT_IMAGE1D_BUFFER) { + format_feature_flags_RO = VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT; + } else { + format_feature_flags_RO = + VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | + VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT; + } VkFormatFeatureFlags format_feature_flags_WO; - format_feature_flags_WO = VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT; + if (type == CL_MEM_OBJECT_IMAGE1D_BUFFER) { + format_feature_flags_WO = VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_BIT; + } else { + format_feature_flags_WO = VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT; + } if (flags & (CL_MEM_KERNEL_READ_AND_WRITE | CL_MEM_WRITE_ONLY)) { format_feature_flags |= format_feature_flags_WO; @@ -292,7 +303,7 @@ cl_image_format_to_vulkan_format(const cl_image_format& clfmt, VkComponentMapping* components_sampled, VkComponentMapping* components_storage); -bool cvk_image::init() { +bool cvk_image::init_vulkan_image() { // Translate image type and size VkImageType image_type; VkImageViewType view_type; @@ -320,7 +331,6 @@ bool cvk_image::init() { size_t host_ptr_size = 0; switch (m_desc.image_type) { - case CL_MEM_OBJECT_IMAGE1D_BUFFER: case CL_MEM_OBJECT_IMAGE1D: image_type = VK_IMAGE_TYPE_1D; view_type = VK_IMAGE_VIEW_TYPE_1D; @@ -397,29 +407,24 @@ bool cvk_image::init() { return false; } - if (m_desc.image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER) { - auto buffer = static_cast(m_desc.buffer); - m_memory = buffer->memory(); - buffer->retain(); - } else { - // Select memory type - cvk_device::allocation_parameters params = - device->select_memory_for(m_image); - if (params.memory_type_index == VK_MAX_MEMORY_TYPES) { - cvk_error_fn("Could not get memory type!"); - return false; - } + CVK_ASSERT(m_desc.image_type != CL_MEM_OBJECT_IMAGE1D_BUFFER); + // Select memory type + cvk_device::allocation_parameters params = + device->select_memory_for(m_image); + if (params.memory_type_index == VK_MAX_MEMORY_TYPES) { + cvk_error_fn("Could not get memory type!"); + return false; + } - // Allocate memory - m_memory = std::make_unique( - vkdev, params.size, params.memory_type_index); + // Allocate memory + m_memory = std::make_unique( + vkdev, params.size, params.memory_type_index); - res = m_memory->allocate(device->uses_physical_addressing()); + res = m_memory->allocate(device->uses_physical_addressing()); - if (res != VK_SUCCESS) { - cvk_error_fn("Could not allocate memory!"); - return false; - } + if (res != VK_SUCCESS) { + cvk_error_fn("Could not allocate memory!"); + return false; } // Bind the image to memory @@ -485,6 +490,56 @@ bool cvk_image::init() { return true; } +bool cvk_image::init_vulkan_texel_buffer() { + VkResult res; + + auto device = m_context->device(); + auto vkdev = device->vulkan_device(); + + VkFormat format; + VkComponentMapping components_sampled, components_storage; + + auto success = cl_image_format_to_vulkan_format( + m_format, &format, &components_sampled, &components_storage); + if (!success) { + return false; + } + + CVK_ASSERT(buffer()); + CVK_ASSERT(buffer()->is_buffer_type()); + + auto vkbuf = static_cast(buffer())->vulkan_buffer(); + auto offset = static_cast(buffer())->vulkan_buffer_offset(); + + VkBufferViewCreateInfo createInfo = { + VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO, + nullptr, + 0, // flags + vkbuf, // buffer + format, // format + offset, // offset + VK_WHOLE_SIZE // range + }; + + res = vkCreateBufferView(vkdev, &createInfo, nullptr, &m_buffer_view); + if (res != VK_SUCCESS) { + cvk_error_fn("Could not create buffer view"); + return false; + } + + buffer()->retain(); + + return true; +} + +bool cvk_image::init() { + if (is_backed_by_buffer_view()) { + return init_vulkan_texel_buffer(); + } else { + return init_vulkan_image(); + } +} + void cvk_image::prepare_fill_pattern(const void* input_pattern, fill_pattern_array& pattern, size_t* size_ret) const { diff --git a/src/memory.hpp b/src/memory.hpp index 63817558..987a1e34 100644 --- a/src/memory.hpp +++ b/src/memory.hpp @@ -314,10 +314,13 @@ struct cvk_buffer : public cvk_mem { cvk_mem* create_subbuffer(cl_mem_flags, size_t origin, size_t size); VkBufferUsageFlags prepare_usage_flags() { - VkBufferUsageFlags usage_flags = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | - VK_BUFFER_USAGE_TRANSFER_DST_BIT | - VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | - VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; + VkBufferUsageFlags usage_flags = + VK_BUFFER_USAGE_TRANSFER_SRC_BIT | + VK_BUFFER_USAGE_TRANSFER_DST_BIT | + VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | + VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | + VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | + VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT; if (m_context->device()->uses_physical_addressing()) { usage_flags |= VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT; } @@ -485,10 +488,16 @@ struct cvk_image : public cvk_mem { /* FIXME parent_offset */ 0, std::move(properties), desc->image_type), m_desc(*desc), m_format(*format), m_image(VK_NULL_HANDLE), - m_sampled_view(VK_NULL_HANDLE), m_storage_view(VK_NULL_HANDLE) { + m_sampled_view(VK_NULL_HANDLE), m_storage_view(VK_NULL_HANDLE), + m_buffer_view(VK_NULL_HANDLE) { // All images require asynchronous initialiation for the initial - // layout transition (and copy/use host ptr init) - m_init_tracker.set_state(cvk_mem_init_state::required); + // layout transition (and copy/use host ptr init) apart from + // those backed by a texel buffer + if (is_backed_by_buffer_view()) { + m_init_tracker.set_state(cvk_mem_init_state::completed); + } else { + m_init_tracker.set_state(cvk_mem_init_state::required); + } } ~cvk_image() { @@ -502,6 +511,9 @@ struct cvk_image : public cvk_mem { if (m_storage_view != VK_NULL_HANDLE) { vkDestroyImageView(vkdev, m_storage_view, nullptr); } + if (m_buffer_view != VK_NULL_HANDLE) { + vkDestroyBufferView(vkdev, m_buffer_view, nullptr); + } if (buffer() != nullptr) { buffer()->release(); } @@ -530,9 +542,26 @@ struct cvk_image : public cvk_mem { const cl_image_format* format, void* host_ptr, std::vector&& properties); - VkImage vulkan_image() const { return m_image; } - VkImageView vulkan_sampled_view() const { return m_sampled_view; } - VkImageView vulkan_storage_view() const { return m_storage_view; } + bool is_backed_by_buffer_view() const { + return type() == CL_MEM_OBJECT_IMAGE1D_BUFFER; + } + + VkImage vulkan_image() const { + CVK_ASSERT(!is_backed_by_buffer_view()); + return m_image; + } + VkImageView vulkan_sampled_view() const { + CVK_ASSERT(!is_backed_by_buffer_view()); + return m_sampled_view; + } + VkImageView vulkan_storage_view() const { + CVK_ASSERT(!is_backed_by_buffer_view()); + return m_storage_view; + } + VkBufferView vulkan_buffer_view() const { + CVK_ASSERT(is_backed_by_buffer_view()); + return m_buffer_view; + } const cl_image_format& format() const { return m_format; } size_t element_size() const { switch (m_format.image_channel_data_type) { @@ -693,6 +722,8 @@ struct cvk_image : public cvk_mem { size_t* size_ret) const; private: + bool init_vulkan_image(); + bool init_vulkan_texel_buffer(); bool init(); size_t num_channels() const { @@ -746,6 +777,7 @@ struct cvk_image : public cvk_mem { VkImage m_image; VkImageView m_sampled_view; VkImageView m_storage_view; + VkBufferView m_buffer_view; std::unordered_map> m_mappings; std::mutex m_mappings_lock; std::unique_ptr m_init_data; diff --git a/src/program.cpp b/src/program.cpp index b0390fab..9e538dcd 100644 --- a/src/program.cpp +++ b/src/program.cpp @@ -117,6 +117,10 @@ spv_result_t parse_reflection(void* user_data, return kernel_argument_kind::sampled_image; case NonSemanticClspvReflectionArgumentStorageImage: return kernel_argument_kind::storage_image; + case NonSemanticClspvReflectionArgumentStorageTexelBuffer: + return kernel_argument_kind::storage_texel_buffer; + case NonSemanticClspvReflectionArgumentUniformTexelBuffer: + return kernel_argument_kind::uniform_texel_buffer; case NonSemanticClspvReflectionArgumentSampler: return kernel_argument_kind::sampler; case NonSemanticClspvReflectionArgumentWorkgroup: @@ -234,6 +238,8 @@ spv_result_t parse_reflection(void* user_data, case NonSemanticClspvReflectionArgumentUniform: case NonSemanticClspvReflectionArgumentSampledImage: case NonSemanticClspvReflectionArgumentStorageImage: + case NonSemanticClspvReflectionArgumentStorageTexelBuffer: + case NonSemanticClspvReflectionArgumentUniformTexelBuffer: case NonSemanticClspvReflectionArgumentSampler: { // These arguments have descriptor set, binding and an optional // arg info. @@ -1743,6 +1749,12 @@ bool cvk_entry_point::build_descriptor_sets_layout_bindings_for_arguments( case kernel_argument_kind::storage_image: dt = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; break; + case kernel_argument_kind::storage_texel_buffer: + dt = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; + break; + case kernel_argument_kind::uniform_texel_buffer: + dt = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; + break; case kernel_argument_kind::sampler: dt = VK_DESCRIPTOR_TYPE_SAMPLER; break; diff --git a/src/program.hpp b/src/program.hpp index 8df31c7e..7a65bc35 100644 --- a/src/program.hpp +++ b/src/program.hpp @@ -47,6 +47,8 @@ enum class kernel_argument_kind pointer_pushconstant, sampled_image, storage_image, + storage_texel_buffer, + uniform_texel_buffer, sampler, local, unused, @@ -101,7 +103,9 @@ struct kernel_argument { return (kind == kernel_argument_kind::buffer) || (kind == kernel_argument_kind::buffer_ubo) || (kind == kernel_argument_kind::sampled_image) || - (kind == kernel_argument_kind::storage_image); + (kind == kernel_argument_kind::storage_image) || + (kind == kernel_argument_kind::storage_texel_buffer) || + (kind == kernel_argument_kind::uniform_texel_buffer); } bool is_unused() const { return kind == kernel_argument_kind::unused; } diff --git a/src/queue.cpp b/src/queue.cpp index 18ca429b..536347fc 100644 --- a/src/queue.cpp +++ b/src/queue.cpp @@ -1192,9 +1192,11 @@ cl_int cvk_command_buffer_host_copy::do_action() { bool success = false; switch (m_type) { + case CL_COMMAND_WRITE_IMAGE: case CL_COMMAND_WRITE_BUFFER: success = m_buffer->copy_from(m_ptr, m_offset, m_size); break; + case CL_COMMAND_READ_IMAGE: case CL_COMMAND_READ_BUFFER: success = m_buffer->copy_to(m_ptr, m_offset, m_size); break; @@ -1669,7 +1671,7 @@ cl_int cvk_command_buffer_image_copy::build_batchable_inner( VkBufferImageCopy region = prepare_buffer_image_copy(m_image, m_offset, m_origin, m_region); - switch (type()) { + switch (m_copy_type) { case CL_COMMAND_COPY_IMAGE_TO_BUFFER: case CL_COMMAND_MAP_IMAGE: build_inner_image_to_buffer(cmdbuf, region); diff --git a/src/queue.hpp b/src/queue.hpp index 4acd0e5e..329fed32 100644 --- a/src/queue.hpp +++ b/src/queue.hpp @@ -627,9 +627,8 @@ struct cvk_command_fill_buffer final : public cvk_command_buffer_base_region { cvk_command_fill_buffer(cvk_command_queue* q, cvk_buffer* buffer, size_t offset, size_t size, const void* pattern, - size_t pattern_size) - : cvk_command_buffer_base_region(q, CL_COMMAND_FILL_BUFFER, buffer, - offset, size), + size_t pattern_size, cl_command_type type) + : cvk_command_buffer_base_region(q, type, buffer, offset, size), m_pattern_size(pattern_size) { memcpy(m_pattern.data(), pattern, pattern_size); } @@ -883,9 +882,9 @@ struct cvk_command_batch : public cvk_command { struct cvk_command_map_buffer final : public cvk_command_buffer_base_region { cvk_command_map_buffer(cvk_command_queue* queue, cvk_buffer* buffer, - size_t offset, size_t size, cl_map_flags flags) - : cvk_command_buffer_base_region(queue, CL_COMMAND_MAP_BUFFER, buffer, - offset, size), + size_t offset, size_t size, cl_map_flags flags, + cl_command_type type) + : cvk_command_buffer_base_region(queue, type, buffer, offset, size), m_flags(flags), m_mapping_needs_releasing_on_destruction(false) {} ~cvk_command_map_buffer() { if (m_mapping_needs_releasing_on_destruction) { @@ -937,7 +936,18 @@ struct cvk_command_buffer_image_copy final : public cvk_command_batchable { const std::array& origin, const std::array& region) : cvk_command_batchable(type, queue), m_buffer(buffer), m_image(image), - m_offset(offset), m_origin(origin), m_region(region) {} + m_offset(offset), m_origin(origin), m_region(region), + m_copy_type(type) {} + + cvk_command_buffer_image_copy(cl_command_type type, + cl_command_type copy_type, + cvk_command_queue* queue, cvk_buffer* buffer, + cvk_image* image, size_t offset, + const std::array& origin, + const std::array& region) + : cvk_command_batchable(type, queue), m_buffer(buffer), m_image(image), + m_offset(offset), m_origin(origin), m_region(region), + m_copy_type(copy_type) {} CHECK_RETURN cl_int build_batchable_inner(cvk_command_buffer& cmdbuf) override final; @@ -957,6 +967,7 @@ struct cvk_command_buffer_image_copy final : public cvk_command_batchable { size_t m_offset; std::array m_origin; std::array m_region; + cl_command_type m_copy_type; }; struct cvk_command_combine final : public cvk_command { @@ -1141,7 +1152,9 @@ struct cvk_command_image_init final : public cvk_command_batchable { cvk_command_image_init(cvk_command_queue* queue, cvk_image* image) : cvk_command_batchable(CLVK_COMMAND_IMAGE_INIT, queue), - m_image(image) {} + m_image(image) { + CVK_ASSERT(!m_image->is_backed_by_buffer_view()); + } bool is_data_movement() const override { return true; } CHECK_RETURN cl_int build_batchable_inner(cvk_command_buffer& cmdbuf) override final; diff --git a/tests/api/images.cpp b/tests/api/images.cpp index 1c1427f4..4643f456 100644 --- a/tests/api/images.cpp +++ b/tests/api/images.cpp @@ -403,17 +403,16 @@ TEST_F(WithCommandQueue, ImageWriteOffset) { } TEST_F(WithContext, Image1DBuffer) { - const size_t IMAGE_HEIGHT = 128; const size_t IMAGE_WIDTH = 128; - auto buffer_size = IMAGE_HEIGHT * IMAGE_WIDTH * sizeof(cl_float4); + auto buffer_size = IMAGE_WIDTH * sizeof(cl_float4); auto buffer = CreateBuffer(CL_MEM_READ_WRITE, buffer_size, nullptr); cl_image_format format = {CL_RGBA, CL_FLOAT}; cl_image_desc desc = { CL_MEM_OBJECT_IMAGE1D_BUFFER, // image_type IMAGE_WIDTH, // image_width - IMAGE_HEIGHT, // image_height + 1, // image_height 1, // image_depth 1, // image_array_size 0, // image_row_pitch @@ -594,3 +593,63 @@ kernel void test(global uint* dst, uint magic, image2d_t read_only image, uint o (dst[2] == (offset + magic))); } } + +TEST_F(WithCommandQueue, 1DBufferImageFromSubBuffer) { + const size_t IMAGE_WIDTH = 128; + const unsigned long nb_prefix_elements = 16; + const unsigned long nb_suffix_elements = 16; + + auto subbuffer_size = IMAGE_WIDTH * sizeof(cl_float4); + auto buffer_size = + subbuffer_size + + (nb_prefix_elements + nb_suffix_elements) * sizeof(cl_float4); + auto buffer = CreateBuffer(CL_MEM_READ_WRITE, buffer_size, nullptr); + auto subbuffer = CreateSubBuffer( + buffer, 0, nb_prefix_elements * sizeof(cl_float4), subbuffer_size); + + cl_image_format format = {CL_RGBA, CL_FLOAT}; + cl_image_desc desc = { + CL_MEM_OBJECT_IMAGE1D_BUFFER, // image_type + IMAGE_WIDTH, // image_width + 1, // image_height + 1, // image_depth + 1, // image_array_size + 0, // image_row_pitch + 0, // image_slice_pitch + 0, // num_mip_levels + 0, // num_samples + subbuffer, // buffer + }; + + auto image = CreateImage(CL_MEM_READ_WRITE, &format, &desc); + + const char* source = R"( +kernel void test(image1d_buffer_t write_only image) +{ + int gid = get_global_id(0); + write_imagef(image, gid, (float4)(0.0, 0.0, 0.0, 0.0)); +} +)"; + auto kernel = CreateKernel(source, "test"); + SetKernelArg(kernel, 0, image); + cl_float pattern = -42.0; + EnqueueFillBuffer(buffer, &pattern, sizeof(pattern), buffer_size); + EnqueueNDRangeKernel(kernel, 1, nullptr, &IMAGE_WIDTH, nullptr); + + cl_float4 output[IMAGE_WIDTH + nb_prefix_elements + nb_suffix_elements]; + EnqueueReadBuffer(buffer, CL_TRUE, 0, buffer_size, output); + for (unsigned i = 0; i < nb_prefix_elements; i++) { + EXPECT_TRUE(output[i].s0 == pattern && output[i].s1 == pattern && + output[i].s2 == pattern && output[i].s3 == pattern); + } + for (unsigned i = nb_prefix_elements; i < IMAGE_WIDTH + nb_prefix_elements; + i++) { + EXPECT_TRUE(output[i].s0 == 0.0 && output[i].s1 == 0.0 && + output[i].s2 == 0.0 && output[i].s3 == 0.0); + } + for (unsigned i = IMAGE_WIDTH + nb_prefix_elements; + i < IMAGE_WIDTH + nb_prefix_elements + nb_suffix_elements; i++) { + EXPECT_TRUE(output[i].s0 == pattern && output[i].s1 == pattern && + output[i].s2 == pattern && output[i].s3 == pattern); + } +} diff --git a/tests/api/testcl.hpp b/tests/api/testcl.hpp index 51040a2f..7dd1a73f 100644 --- a/tests/api/testcl.hpp +++ b/tests/api/testcl.hpp @@ -405,6 +405,20 @@ class WithContext : public ::testing::Test { return mem; } + holder CreateSubBuffer(cl_mem buffer, cl_mem_flags flags, + const size_t region_origin, + const size_t region_size) { + cl_int err; + cl_buffer_region buffer_region = { + .origin = region_origin, + .size = region_size, + }; + auto mem = clCreateSubBuffer( + buffer, flags, CL_BUFFER_CREATE_TYPE_REGION, &buffer_region, &err); + EXPECT_CL_SUCCESS(err); + return mem; + } + holder CreateSampler(cl_bool normalized_coords, cl_addressing_mode addressing_mode, cl_filter_mode filter_mode) { @@ -656,6 +670,20 @@ class WithCommandQueue : public WithContext { nullptr, nullptr); } + void EnqueueFillBuffer(cl_mem buffer, const void* pattern, + size_t pattern_size, size_t offset, size_t size) { + auto err = clEnqueueFillBuffer(m_queue, buffer, pattern, pattern_size, + offset, size, 0, nullptr, nullptr); + ASSERT_CL_SUCCESS(err); + } + + void EnqueueFillBuffer(cl_mem buffer, const void* pattern, + size_t pattern_size, size_t size) { + auto err = clEnqueueFillBuffer(m_queue, buffer, pattern, pattern_size, + 0, size, 0, nullptr, nullptr); + ASSERT_CL_SUCCESS(err); + } + void EnqueueReadImage(cl_mem image, cl_bool blocking_read, const size_t* origin, const size_t* region, size_t row_pitch, size_t slice_pitch, void* ptr,