From 761d8c1b43873de1643165e47498655c5f12bc20 Mon Sep 17 00:00:00 2001 From: Sean Stirling Date: Thu, 18 Jan 2024 10:08:33 +0000 Subject: [PATCH 1/8] [SYCL][Bindless] Add mipmap interop + slight redesign + bug fix Mipmap interop: - Remove "interop" image type as it is redundant for image creation - Add a vulkan mipmap interop test Slight redesign: - Simplify external resources Modify the bindless spec to reflect these changes Fix Vulkan interop tests to prevent memory leak issue by freeing mapped memory --- .../sycl_ext_oneapi_bindless_images.asciidoc | 141 +++--- .../ext/oneapi/bindless_images_descriptor.hpp | 7 +- .../ext/oneapi/bindless_images_interop.hpp | 47 +- sycl/plugins/unified_runtime/CMakeLists.txt | 16 +- sycl/source/detail/bindless_images.cpp | 64 ++- .../vulkan_interop/mipmaps.cpp | 448 ++++++++++++++++++ .../vulkan_interop/sampled_images.cpp | 78 +-- .../vulkan_interop/unsampled_images.cpp | 59 +-- .../vulkan_interop/vulkan_common.hpp | 55 ++- sycl/test/abi/sycl_symbols_linux.dump | 4 + sycl/test/abi/sycl_symbols_windows.dump | 4 + 11 files changed, 726 insertions(+), 197 deletions(-) create mode 100644 sycl/test-e2e/bindless_images/vulkan_interop/mipmaps.cpp diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_bindless_images.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_bindless_images.asciidoc index 7616890d25674..a00fa7c5a21f2 100644 --- a/sycl/doc/extensions/experimental/sycl_ext_oneapi_bindless_images.asciidoc +++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_bindless_images.asciidoc @@ -194,7 +194,6 @@ enum class image_channel_type : /* unspecified */ { enum class image_type : /* unspecified */ { standard, mipmap, - interop, }; struct image_descriptor { @@ -229,7 +228,7 @@ struct image_descriptor { The image descriptor represents the image dimensions, channel type, and channel order. An `image_type` member is also present to allow for implementation of -mipmapped and interop images. +mipmapped images. The `image_descriptor` shall be default constructible and follow by-value semantics. @@ -1236,40 +1235,58 @@ memory or semaphore objects. CUDA for example only supports importation of external memory and semaphores, but provides no support for their exportation. ==== -=== Importing external memory objects +=== External Resource types -In order to import a memory object, an external API must provide an appropriate -handle to that memory. The exact structure and type of this handle can depend on -the external API, and the operating system the application is running on. - -In order to facilitate a number of different external memory handle types, we -propose the following structures. +In order to facilitate the importing of a number of different external memory +and external semaphore handle types, we propose the following resource +structures. [NOTE] ==== -We only show two examples of external memory handle types here, but the -`external_mem_descriptor` struct could be templated by any number of handle +We only show three examples of external resource handle types here, but the +`external_mem_descriptor` and `external_semaphore_descriptor` structs, as +defined in <> and +<>, could be templated by any number of handle types, provided that the SYCL implementation provides support for them. ==== ```cpp namespace sycl::ext::oneapi::experimental { -// POSIX file descriptor memory handle type -struct external_mem_fd { +// POSIX file descriptor handle type +struct resource_fd { int file_descriptor; }; -// Windows NT memory handle type -struct external_mem_win32 { +// Windows NT handle type +struct resource_win32_handle { void *handle; +}; + +// Windows NT name type +struct resource_win32_name { const void *name; }; -// Descriptor templated on specific external memory handle type -template -struct external_mem_handle_type { - external_mem_handle_type external_handle; +} +``` + +=== Importing external memory objects [[importing_external_memory_objects]] + +In order to import a memory object, an external API must provide an appropriate +handle to that memory. The exact structure and type of this handle can depend on +the external API, and the operating system the application is running on. + +External memory import is facilitated through the following proposed descriptor +struct. + +```cpp +namespace sycl::ext::oneapi::experimental { + +// Descriptor templated on specific resource type +template +struct external_mem_descriptor { + ResourceType external_resource; size_t size_in_bytes; }; @@ -1277,12 +1294,12 @@ struct external_mem_handle_type { ``` The user should create an `external_mem_descriptor` templated on the appropriate -handle type for their purposes, e.g. `external_mem_fd` to describe a POSIX file -descriptor resource on Linux systems, or an `external_mem_win32` for Windows NT -resource handles. +handle type, `ResourceType`, for their purposes, e.g. `resource_fd` to describe +a POSIX file descriptor resource on Linux systems, or a `resource_win32_handle` +for Windows NT resource handles. Once the user populates the `external_mem_descriptor` with the appropriate -`external_mem_handle_type` values, and the size of the external memory in bytes, +`ResourceType` values, and the size of the external memory in bytes, they can then import that memory into SYCL through `import_external_memory`. ```cpp @@ -1293,15 +1310,15 @@ struct interop_mem_handle { raw_handle_type raw_handle; }; -template +template interop_mem_handle import_external_memory( - external_mem_descriptor externalMemDescriptor, + external_mem_descriptor externalMemDescriptor, const sycl::device &syclDevice, const sycl::context &syclContext); -template +template interop_mem_handle import_external_memory( - external_mem_descriptor externalMemDescriptor, + external_mem_descriptor externalMemDescriptor, const sycl::queue &syclQueue); image_mem_handle map_external_image_memory( @@ -1325,8 +1342,12 @@ When calling `create_image` with an `image_mem_handle` mapped from an external memory object, the user must ensure that the image descriptor they pass to `create_image` has members that match or map to those of the external API. A mismatch between any of the `width`, `height`, `depth`, `image_channel_type`, -or `image_channel_order` members will result in undefined behavior. The -`image_type` member must be set to `image_type::interop`. +or `image_channel_order` members will result in undefined behavior. + +Additionally, the `image_type` describing the image must match to the image of +the external API. The current supported importable image types are `standard` +and `mipmap`. Attempting to import other image types will result in undefined +behaviour. Once a user has finished operating on imported memory, they must ensure that they destroy the imported memory handle through `release_external_memory`. @@ -1348,7 +1369,7 @@ void release_external_memory(interop_mem_handle interopMem, Destroying or freeing any imported memory through `image_mem_free` or `sycl::free` will result in undefined behavior. -=== Importing external semaphores +=== Importing external semaphores [[importing_external_semaphores]] In addition to proposing importation of external memory resources, we also propose importation of synchronization primitives. Just like the sharing of @@ -1358,47 +1379,29 @@ memory resources handles can take different forms of structure and type depending on the API and operating system, so do external semaphore resource handles. -In order to facilitate a number of different external semaphore handle types, we -propose the following structures. - -[NOTE] -==== -We only show two examples of external semaphore resource handle types here, but -the `external_semaphore_descriptor` struct could be templated by any number of -handle types, provided that the SYCL implementation provides support for them. -==== +External semaphore import is facilitated through the following proposed +descriptor struct. ```cpp namespace sycl::ext::oneapi::experimental { -// POSIX file descriptor semaphore handle -struct external_semaphore_fd { - int file_descriptor; -}; - -// Windows NT semaphore handle -struct external_semaphore_win32 { - void *handle; - const void *name; -}; - -// Descriptor templated on specific external semaphore handle type -template +// Descriptor templated on specific resource type +template struct external_semaphore_descriptor { - external_semaphore_handle_type external_handle; + ResourceType external_resource; }; } ``` The user should create an `external_semaphore_descriptor` templated on the -appropriate handle type for their purposes, e.g. `external_semaphore_fd` to -describe a POSIX file descriptor resource on Linux systems, or an -`external_mem_win32` for Windows NT resource handles. +appropriate handle type, `ResourceType`, for their purposes, e.g. `resource_fd` +to describe a POSIX file descriptor resource on Linux systems, or a +`resource_win32_handle` for Windows NT resource handles. Once the user populates the `external_semaphore_descriptor` with the appropriate -`external_semaphore_handle_type` values, they can then import that semaphore -into SYCL through `import_external_semaphore`. +`ResourceType` values, they can then import that semaphore into SYCL through +`import_external_semaphore`. ```cpp namespace sycl::ext::oneapi::experimental { @@ -1408,17 +1411,17 @@ struct interop_semaphore_handle { raw_handle_type raw_handle; }; -template +template interop_semaphore_handle import_external_semaphore( - external_semaphore_descriptor + external_semaphore_descriptor externalSemaphoreDescriptor, const sycl::device &syclDevice, const sycl::context &syclContext); } -template +template interop_semaphore_handle import_external_semaphore( - external_semaphore_descriptor + external_semaphore_descriptor externalSemaphoreDescriptor, const sycl::queue &syclQueue); } @@ -1786,10 +1789,8 @@ sycl::ext::oneapi::experimental::image_channel_type channel_type = /* we assume sycl::image_channel_type::unsigned_int32 */; // Image descriptor - mapped to external API image layout -// with `image_type::interop` sycl::ext::oneapi::experimental::image_descriptor desc( - {width, height}, channel_order, channel_type, - sycl::ext::oneapi::experimental::image_type::interop); + {width, height}, channel_order, channel_type); size_t img_size_in_bytes = width * height * sizeof(uint32_t); @@ -1798,12 +1799,12 @@ int external_output_image_file_descriptor = /* passed from external API */ // Extension: populate external memory descriptors sycl::ext::oneapi::experimental::external_mem_descriptor< - sycl::ext::oneapi::experimental::external_mem_fd> + sycl::ext::oneapi::experimental::resource_fd> input_ext_mem_desc{external_input_image_file_descriptor, img_size_in_bytes}; sycl::ext::oneapi::experimental::external_mem_descriptor< - sycl::ext::oneapi::experimental::external_mem_fd> + sycl::ext::oneapi::experimental::resource_fd> output_ext_mem_desc{external_output_image_file_descriptor, img_size_in_bytes}; @@ -1818,11 +1819,11 @@ int done_semaphore_file_descriptor = /* passed from external API */; // Extension: populate external semaphore descriptor. // We assume POSIX file descriptor resource types sycl::ext::oneapi::experimental::external_semaphore_descriptor< - sycl::ext::oneapi::experimental::external_semaphore_fd> + sycl::ext::oneapi::experimental::resource_fd> wait_external_semaphore_desc{wait_semaphore_file_descriptor}; sycl::ext::oneapi::experimental::external_semaphore_descriptor< - sycl::ext::oneapi::experimental::external_semaphore_fd> + sycl::ext::oneapi::experimental::resource_fd> done_external_semaphore_desc{done_semaphore_file_descriptor}; try { @@ -2084,4 +2085,6 @@ These features still need to be handled: |5.3|2024-02-16| - Replace `read_image` and `read_mipmap` APIs in favor of more descriptive naming, with `fetch_image`, `sample_image`, and `sample_mipmap`. +|5.4|2024-02-26| - Update interop with mipmap interop and slight redesign + - `interop` removed from `image_type` |====================== diff --git a/sycl/include/sycl/ext/oneapi/bindless_images_descriptor.hpp b/sycl/include/sycl/ext/oneapi/bindless_images_descriptor.hpp index dd8751992bd7c..49c1d7f1af642 100644 --- a/sycl/include/sycl/ext/oneapi/bindless_images_descriptor.hpp +++ b/sycl/include/sycl/ext/oneapi/bindless_images_descriptor.hpp @@ -24,10 +24,9 @@ namespace ext::oneapi::experimental { /// image type enum enum class image_type : unsigned int { standard = 0, - interop = 1, - mipmap = 2, - cubemap = 3, /* Not implemented */ - layered = 4, /* Not implemented */ + mipmap = 1, + cubemap = 2, /* Not implemented */ + layered = 3, /* Not implemented */ }; /// A struct to describe the properties of an image. diff --git a/sycl/include/sycl/ext/oneapi/bindless_images_interop.hpp b/sycl/include/sycl/ext/oneapi/bindless_images_interop.hpp index 822340f264cf8..f7caddc1b5bf7 100644 --- a/sycl/include/sycl/ext/oneapi/bindless_images_interop.hpp +++ b/sycl/include/sycl/ext/oneapi/bindless_images_interop.hpp @@ -22,37 +22,54 @@ struct interop_mem_handle { raw_handle_type raw_handle; }; -/// External memory file descriptor type -struct external_mem_fd { +/// Opaque interop semaphore handle type +struct interop_semaphore_handle { + using raw_handle_type = pi_uint64; + raw_handle_type raw_handle; +}; + +// External resource file descriptor type +struct resource_fd { int file_descriptor; }; -/// Windows external memory type -struct external_mem_win32 { +// Windows external handle type +struct resource_win32_handle { void *handle; +}; + +// Windows external name type +struct resource_win32_name { const void *name; }; /// Opaque external memory descriptor type -template struct external_mem_descriptor { - HandleType external_handle; +template struct external_mem_descriptor { + ResourceType external_resource; size_t size_in_bytes; }; -/// Opaque interop semaphore handle type -struct interop_semaphore_handle { - using raw_handle_type = pi_uint64; - raw_handle_type raw_handle; +// Opaque external semaphore descriptor type +template struct external_semaphore_descriptor { + ResourceType external_resource; }; -/// External semaphore file descriptor type -struct external_semaphore_fd { +/// EVERYTHING BELOW IS DEPRECATED + +/// External memory file descriptor type +struct external_mem_fd { int file_descriptor; }; -/// Opaque external semaphore descriptor type -template struct external_semaphore_descriptor { - HandleType external_handle; +/// Windows external memory type +struct external_mem_win32 { + void *handle; + const void *name; +}; + +/// External semaphore file descriptor type +struct external_semaphore_fd { + int file_descriptor; }; } // namespace ext::oneapi::experimental diff --git a/sycl/plugins/unified_runtime/CMakeLists.txt b/sycl/plugins/unified_runtime/CMakeLists.txt index 66f0c4e70c543..28e288d695b98 100644 --- a/sycl/plugins/unified_runtime/CMakeLists.txt +++ b/sycl/plugins/unified_runtime/CMakeLists.txt @@ -56,14 +56,14 @@ endif() if(SYCL_PI_UR_USE_FETCH_CONTENT) include(FetchContent) - set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git") - # commit 588615e90bfd2b889834120dfff172236c6b8aa8 - # Merge: 4e69cc60 47084751 - # Author: Kenneth Benzie (Benie) - # Date: Thu Feb 22 16:10:13 2024 +0000 - # Merge pull request #1371 from pbalcer/l0-query-status-sync-deadlock - # [L0] fix a deadlock in queue sync and event status query - set(UNIFIED_RUNTIME_TAG 588615e90bfd2b889834120dfff172236c6b8aa8) + set(UNIFIED_RUNTIME_REPO "https://github.com/Seanst98/unified-runtime.git") + # commit 79c28d0f0713f58358d5080653d95803fd131749 + # Merge: 25e0b603 45d76b78 + # Author: aarongreig + # Date: Fri Jan 12 16:14:44 2024 +0000 + # Merge pull request #1186 from hdelan/device-global-hip + # [HIP] Add support for global variable read write + set(UNIFIED_RUNTIME_TAG 4fc4b4f56ac25b871f52f864b4b1da2560ec0afe) if(SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO) set(UNIFIED_RUNTIME_REPO "${SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO}") diff --git a/sycl/source/detail/bindless_images.cpp b/sycl/source/detail/bindless_images.cpp index 079d3d8d97daa..2d54a02750f18 100644 --- a/sycl/source/detail/bindless_images.cpp +++ b/sycl/source/detail/bindless_images.cpp @@ -466,8 +466,8 @@ create_image(void *devPtr, size_t pitch, const bindless_image_sampler &sampler, } template <> -__SYCL_EXPORT interop_mem_handle import_external_memory( - external_mem_descriptor externalMem, +__SYCL_EXPORT interop_mem_handle import_external_memory( + external_mem_descriptor externalMem, const sycl::device &syclDevice, const sycl::context &syclContext) { std::shared_ptr CtxImpl = sycl::detail::getSyclObjImpl(syclContext); @@ -481,13 +481,39 @@ __SYCL_EXPORT interop_mem_handle import_external_memory( Plugin->call( C, Device, externalMem.size_in_bytes, - externalMem.external_handle.file_descriptor, &piInteropMem); + externalMem.external_resource.file_descriptor, &piInteropMem); return interop_mem_handle{piInteropMem}; } template <> -__SYCL_EXPORT interop_mem_handle import_external_memory( +__SYCL_EXPORT interop_mem_handle import_external_memory( + external_mem_descriptor externalMem, + const sycl::queue &syclQueue) { + return import_external_memory( + externalMem, syclQueue.get_device(), syclQueue.get_context()); +} + +template <> +__SYCL_EXPORT_DEPRECATED( + "import_external_memory templated by external_mem_fd is deprecated." + "Template with resource_fd instead.") +interop_mem_handle import_external_memory( + external_mem_descriptor externalMem, + const sycl::device &syclDevice, const sycl::context &syclContext) { + + external_mem_descriptor extMem; + extMem.external_resource.file_descriptor = + externalMem.external_resource.file_descriptor; + extMem.size_in_bytes = externalMem.size_in_bytes; + return import_external_memory(extMem, syclDevice, syclContext); +} + +template <> +__SYCL_EXPORT_DEPRECATED( + "import_external_memory templated by external_mem_fd is deprecated." + "Template with resource_fd instead.") +interop_mem_handle import_external_memory( external_mem_descriptor externalMem, const sycl::queue &syclQueue) { return import_external_memory( @@ -571,7 +597,7 @@ __SYCL_EXPORT void release_external_memory(interop_mem_handle interopMem, template <> __SYCL_EXPORT interop_semaphore_handle import_external_semaphore( - external_semaphore_descriptor externalSemaphoreDesc, + external_semaphore_descriptor externalSemaphoreDesc, const sycl::device &syclDevice, const sycl::context &syclContext) { std::shared_ptr CtxImpl = sycl::detail::getSyclObjImpl(syclContext); @@ -585,7 +611,7 @@ __SYCL_EXPORT interop_semaphore_handle import_external_semaphore( Plugin->call( - C, Device, externalSemaphoreDesc.external_handle.file_descriptor, + C, Device, externalSemaphoreDesc.external_resource.file_descriptor, &piInteropSemaphore); return interop_semaphore_handle{piInteropSemaphore}; @@ -593,6 +619,32 @@ __SYCL_EXPORT interop_semaphore_handle import_external_semaphore( template <> __SYCL_EXPORT interop_semaphore_handle import_external_semaphore( + external_semaphore_descriptor externalSemaphoreDesc, + const sycl::queue &syclQueue) { + return import_external_semaphore( + externalSemaphoreDesc, syclQueue.get_device(), syclQueue.get_context()); +} + +template <> +__SYCL_EXPORT_DEPRECATED("import_external_semaphore templated by " + "external_semaphore_fd is deprecated." + "Template with resource_fd instead.") +interop_semaphore_handle import_external_semaphore( + external_semaphore_descriptor externalSemaphoreDesc, + const sycl::device &syclDevice, const sycl::context &syclContext) { + + external_semaphore_descriptor extSem; + extSem.external_resource.file_descriptor = + externalSemaphoreDesc.external_resource.file_descriptor; + return import_external_semaphore(extSem, syclDevice, + syclContext); +} + +template <> +__SYCL_EXPORT_DEPRECATED("import_external_semaphore templated by " + "external_semaphore_fd is deprecated." + "Template with resource_fd instead.") +interop_semaphore_handle import_external_semaphore( external_semaphore_descriptor externalSemaphoreDesc, const sycl::queue &syclQueue) { return import_external_semaphore( diff --git a/sycl/test-e2e/bindless_images/vulkan_interop/mipmaps.cpp b/sycl/test-e2e/bindless_images/vulkan_interop/mipmaps.cpp new file mode 100644 index 0000000000000..ea26c85718796 --- /dev/null +++ b/sycl/test-e2e/bindless_images/vulkan_interop/mipmaps.cpp @@ -0,0 +1,448 @@ +// REQUIRES: linux +// REQUIRES: cuda +// REQUIRES: vulkan + +// RUN: %{build} %link-vulkan -o %t.out +// RUN: %{run} %t.out + +// Uncomment to print additional test information +// #define VERBOSE_PRINT + +#include + +#include "vulkan_common.hpp" + +namespace syclexp = sycl::ext::oneapi::experimental; + +struct handles_t { + syclexp::sampled_image_handle imgInput; + syclexp::image_mem_handle imgMem; + syclexp::interop_mem_handle inputInteropMemHandle; +}; + +handles_t create_handles(sycl::context &ctxt, sycl::device &dev, + const syclexp::bindless_image_sampler &samp, + int input_image_fd, syclexp::image_descriptor desc, + size_t imgSize) { + + // Extension: external memory descriptor + syclexp::external_mem_descriptor inputExtMemDesc{ + input_image_fd, imgSize}; + + // Extension: interop mem handle imported from file descriptor + syclexp::interop_mem_handle inputInteropMemHandle = + syclexp::import_external_memory(inputExtMemDesc, dev, ctxt); + + // Extension: interop mem handle imported from file descriptor + syclexp::image_mem_handle inputMappedMemHandle = + syclexp::map_external_image_memory(inputInteropMemHandle, desc, dev, + ctxt); + + // Extension: create the image and return the handle + syclexp::sampled_image_handle imgInput = + syclexp::create_image(inputMappedMemHandle, samp, desc, dev, ctxt); + + return {imgInput, inputMappedMemHandle, inputInteropMemHandle}; +} + +template +bool run_sycl(sycl::range globalSize, sycl::range localSize, + int input_image_fd, size_t mipLevels, size_t reqSize) { + sycl::device dev; + sycl::queue q(dev); + auto ctxt = q.get_context(); + + // Image descriptor - mapped to Vulkan image layout + syclexp::image_descriptor desc(globalSize, COrder, CType, + syclexp::image_type::mipmap, mipLevels); + + syclexp::bindless_image_sampler samp( + sycl::addressing_mode::repeat, + sycl::coordinate_normalization_mode::normalized, + sycl::filtering_mode::nearest, sycl::filtering_mode::linear, 0.0f, + (float)mipLevels, 8.0f); + + const auto mip0Elems = globalSize.size(); + + auto width = globalSize[0]; + auto height = globalSize[1]; + auto depth = 1UL; + + sycl::range outBufferRange; + if constexpr (NDims == 3) { + depth = globalSize[2]; + outBufferRange = sycl::range{depth, height, width}; + } else { + outBufferRange = sycl::range{height, width}; + } + + using VecType = sycl::vec; + + auto handles = create_handles(ctxt, dev, samp, input_image_fd, desc, reqSize); + + std::vector out(mip0Elems); + try { + + sycl::buffer buf((VecType *)out.data(), outBufferRange); + q.submit([&](sycl::handler &cgh) { + auto outAcc = buf.template get_access( + cgh, outBufferRange); + cgh.parallel_for( + sycl::nd_range{globalSize, localSize}, + [=](sycl::nd_item it) { + if constexpr (NDims == 3) { + size_t dim0 = it.get_global_id(0); + size_t dim1 = it.get_global_id(1); + size_t dim2 = it.get_global_id(2); + + // Normalize coordinates -- +0.5 to look towards centre of pixel + float fdim0 = float(dim0 + 0.5f) / (float)width; + float fdim1 = float(dim1 + 0.5f) / (float)height; + float fdim2 = float(dim2 + 0.5f) / (float)depth; + + // Extension: read image data from handle (Vulkan imported) + VecType pixel1 = syclexp::read_mipmap< + std::conditional_t>( + handles.imgInput, sycl::float3(fdim0, fdim1, fdim2), 0.0f); + + VecType pixel2 = syclexp::read_mipmap< + std::conditional_t>( + handles.imgInput, sycl::float3(fdim0, fdim1, fdim2), 1.0f); + + outAcc[sycl::id{dim2, dim1, dim0}] = pixel1 + pixel2; + } else { + size_t dim0 = it.get_global_id(0); + size_t dim1 = it.get_global_id(1); + + // Normalize coordinates -- +0.5 to look towards centre of pixel + float fdim0 = float(dim0 + 0.5f) / (float)width; + float fdim1 = float(dim1 + 0.5f) / (float)height; + + // Extension: read image data from handle (Vulkan imported) + VecType pixel1 = syclexp::read_mipmap< + std::conditional_t>( + handles.imgInput, sycl::float2(fdim0, fdim1), 0.0f); + + VecType pixel2 = syclexp::read_mipmap< + std::conditional_t>( + handles.imgInput, sycl::float2(fdim0, fdim1), 1.0f); + + outAcc[sycl::id{dim1, dim0}] = pixel1 + pixel2; + } + }); + }); + q.wait_and_throw(); + + syclexp::destroy_image_handle(handles.imgInput, dev, ctxt); + syclexp::free_image_mem(handles.imgMem, syclexp::image_type::mipmap, dev, + ctxt); + syclexp::release_external_memory(handles.inputInteropMemHandle, dev, ctxt); + } catch (sycl::exception e) { + std::cerr << "\tKernel submission failed! " << e.what() << std::endl; + exit(-1); + } catch (...) { + std::cerr << "\tKernel submission failed!" << std::endl; + exit(-1); + } + + printString("Validating\n"); + // Expected is sum of first two levels in the mipmap + // Each subsequent level repeats in each dimension + bool validated = true; + if constexpr (NDims == 3) { + for (int i = 0; i < width; ++i) { + for (int j = 0; j < height; ++j) { + for (int k = 0; k < depth; ++k) { + bool mismatch = false; + float norm_coord_x = ((i + 0.5f) / (float)width); + int x = norm_coord_x * (width >> 1); + float norm_coord_y = ((j + 0.5f) / (float)height); + int y = norm_coord_y * (height >> 1); + float norm_coord_z = ((k + 0.5f) / (float)depth); + int z = norm_coord_z * (depth >> 1); + + VecType expected = + initVector(i + width * (j + height * k)) + + initVector(x + (width / 2) * + (y + (height / 2) * z)); + + if (!equal_vec(out[i + width * (j + height * k)], + expected)) { + mismatch = true; + validated = false; + } + if (mismatch) { +#ifdef VERBOSE_PRINT + std::cout << "Result mismatch! Expected: " << expected + << ", Actual: " << out[i + width * (j + height * k)] + << "\n"; +#else + break; +#endif + } + } + } + } + } else { + for (int i = 0; i < width; i++) { + for (int j = 0; j < height; j++) { + bool mismatch = false; + float norm_coord_x = ((i + 0.5f) / (float)width); + int x = norm_coord_x * (width >> 1); + float norm_coord_y = ((j + 0.5f) / (float)height); + int y = norm_coord_y * (height >> 1); + + VecType expected = initVector(j + (width * i)) + + initVector(y + (width / 2 * x)); + + if (!equal_vec(out[j + (width * i)], expected)) { + mismatch = true; + validated = false; + } + if (mismatch) { +#ifdef VERBOSE_PRINT + std::cout << "Result mismatch! Expected: " << expected + << ", Actual: " << out[j + (width * i)] << "\n"; +#else + break; +#endif + } + } + } + } + if (validated) { + printString("Results are correct!\n"); + } + + return validated; +} + +template +bool run_test(sycl::range dims, sycl::range localSize, + size_t mipLevels, unsigned int seed = 0) { + + uint32_t width = static_cast(dims[0]); + uint32_t height = 1; + uint32_t depth = 1; + + size_t mip0Elems = dims[0]; + VkImageType imgType = VK_IMAGE_TYPE_1D; + + if constexpr (NDims > 1) { + mip0Elems *= dims[1]; + height = static_cast(dims[1]); + imgType = VK_IMAGE_TYPE_2D; + } + if constexpr (NDims > 2) { + mip0Elems *= dims[2]; + depth = static_cast(dims[2]); + imgType = VK_IMAGE_TYPE_3D; + } + + using VecType = sycl::vec; + VkFormat format = vkutil::to_vulkan_format(COrder, CType); + + printString("Creating input image\n"); + // Create input image memory + auto inputImage = vkutil::createImage(imgType, format, {width, height, depth}, + VK_IMAGE_USAGE_TRANSFER_SRC_BIT | + VK_IMAGE_USAGE_TRANSFER_DST_BIT | + VK_IMAGE_USAGE_STORAGE_BIT, + mipLevels); + VkMemoryRequirements memRequirements; + auto inputImageMemoryTypeIndex = vkutil::getImageMemoryTypeIndex( + inputImage, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, memRequirements); + auto inputMemory = vkutil::allocateDeviceMemory(memRequirements.size, + inputImageMemoryTypeIndex); + VK_CHECK_CALL(vkBindImageMemory(vk_device, inputImage, inputMemory, + 0 /*memoryOffset*/)); + + printString("Creating staging buffers\n"); + // Create input staging memory + auto inputStagingBuffer = vkutil::createBuffer( + memRequirements.size, + VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT); + auto inputStagingMemoryTypeIndex = vkutil::getBufferMemoryTypeIndex( + inputStagingBuffer, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT); + auto inputStagingMemory = vkutil::allocateDeviceMemory( + memRequirements.size, inputStagingMemoryTypeIndex, false /*exportable*/); + VK_CHECK_CALL(vkBindBufferMemory(vk_device, inputStagingBuffer, + inputStagingMemory, 0 /*memoryOffset*/)); + + printString("Populating staging buffer\n"); + // Populate staging memory + VecType *inputStagingData = nullptr; + VK_CHECK_CALL(vkMapMemory(vk_device, inputStagingMemory, 0 /*offset*/, + memRequirements.size, 0 /*flags*/, + (void **)&inputStagingData)); + + // Set input data as each mip level -- 0 -> mip size e.g. (0,1,...,63,0,1,...) + size_t offset = 0; + size_t mipElems = mip0Elems; + for (int i = 0; i < mipLevels; ++i) { + mipElems = (std::max(width >> i, (uint32_t)1) * + std::max(height >> i, (uint32_t)1) * + std::max(depth >> i, (uint32_t)1)); + for (int j = 0; j < mipElems; ++j) { + inputStagingData[j + offset] = initVector(j); + } + offset += mipElems; + } + vkUnmapMemory(vk_device, inputStagingMemory); + + printString("Submitting image layout transition\n"); + // Transition image layouts + { + VkImageMemoryBarrier barrierInput = + vkutil::createImageMemoryBarrier(inputImage, mipLevels); + + VkCommandBufferBeginInfo cbbi = {}; + cbbi.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; + cbbi.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; + + VK_CHECK_CALL(vkBeginCommandBuffer(vk_computeCmdBuffer, &cbbi)); + vkCmdPipelineBarrier(vk_computeCmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 0, + nullptr, 1, &barrierInput); + VK_CHECK_CALL(vkEndCommandBuffer(vk_computeCmdBuffer)); + + VkSubmitInfo submission = {}; + submission.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + submission.commandBufferCount = 1; + submission.pCommandBuffers = &vk_computeCmdBuffer; + + VK_CHECK_CALL(vkQueueSubmit(vk_compute_queue, 1 /*submitCount*/, + &submission, VK_NULL_HANDLE /*fence*/)); + VK_CHECK_CALL(vkQueueWaitIdle(vk_compute_queue)); + } + + printString("Copying staging memory to images\n"); + // Copy staging to main image memory + { + VkDeviceSize currentOffset{0}; + + // Copy each mip level individually + for (int i = 0; i < mipLevels; ++i) { + VkCommandBufferBeginInfo cbbi = {}; + cbbi.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; + cbbi.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; + + VkBufferImageCopy copyRegion = {}; + copyRegion.imageExtent = {std::max(width >> i, (uint32_t)1), + std::max(height >> i, (uint32_t)1), + std::max(depth >> i, (uint32_t)1)}; + copyRegion.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + copyRegion.imageSubresource.layerCount = 1; + copyRegion.imageSubresource.mipLevel = i; + copyRegion.bufferOffset = currentOffset; + + currentOffset += std::max(width >> i, (uint32_t)1) * + std::max(height >> i, (uint32_t)1) * + std::max(depth >> i, (uint32_t)1) * NChannels * + sizeof(DType); + + VK_CHECK_CALL(vkBeginCommandBuffer(vk_transferCmdBuffers[0], &cbbi)); + vkCmdCopyBufferToImage(vk_transferCmdBuffers[0], inputStagingBuffer, + inputImage, VK_IMAGE_LAYOUT_GENERAL, + 1 /*regionCount*/, ©Region); + VK_CHECK_CALL(vkEndCommandBuffer(vk_transferCmdBuffers[0])); + + VkSubmitInfo submission = {}; + submission.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + submission.commandBufferCount = 1; + submission.pCommandBuffers = &vk_transferCmdBuffers[0]; + + VK_CHECK_CALL(vkQueueSubmit(vk_transfer_queue, 1 /*submitCount*/, + &submission, VK_NULL_HANDLE /*fence*/)); + VK_CHECK_CALL(vkQueueWaitIdle(vk_transfer_queue)); + } + } + + printString("Getting memory file descriptors and calling into SYCL\n"); + // Pass memory to SYCL for modification + auto input_fd = vkutil::getMemoryOpaqueFD(inputMemory); + bool result = run_sycl( + dims, localSize, input_fd, mipLevels, memRequirements.size); + + // Cleanup + vkDestroyBuffer(vk_device, inputStagingBuffer, nullptr); + vkDestroyImage(vk_device, inputImage, nullptr); + vkFreeMemory(vk_device, inputStagingMemory, nullptr); + vkFreeMemory(vk_device, inputMemory, nullptr); + + return result; +} + +bool run_tests() { + bool valid = run_test<2, float, 4, sycl::image_channel_type::fp32, + sycl::image_channel_order::rgba, class float_2d>( + {16, 16}, {2, 2}, 2, 0); + + valid &= run_test<2, float, 2, sycl::image_channel_type::fp32, + sycl::image_channel_order::rg, class float_2d_large>( + {8, 8}, {4, 2}, 2, 0); + + valid &= run_test<3, char, 2, sycl::image_channel_type::signed_int8, + sycl::image_channel_order::rg, class float_3d>( + {8, 8, 8}, {2, 2, 2}, 2, 0); + + valid &= run_test<2, uint32_t, 1, sycl::image_channel_type::unsigned_int32, + sycl::image_channel_order::r, class uint32_2d>( + {32, 32}, {4, 2}, 2, 0); + + valid &= run_test<3, uint32_t, 4, sycl::image_channel_type::unsigned_int32, + sycl::image_channel_order::rgba, class uint_3d_large>( + {8, 8, 8}, {2, 2, 4}, 2, 0); + + valid &= run_test<2, int32_t, 1, sycl::image_channel_type::signed_int32, + sycl::image_channel_order::r, class int32_2d>({64, 64}, + {4, 2}, 2, 0); + + valid &= run_test<3, int32_t, 2, sycl::image_channel_type::signed_int32, + sycl::image_channel_order::rg, class int32_3d>( + {8, 8, 8}, {4, 2, 4}, 2, 0); + + valid &= run_test<3, int16_t, 1, sycl::image_channel_type::signed_int16, + sycl::image_channel_order::r, class int16_3d>( + {32, 32, 32}, {4, 2, 4}, 2, 0); + + return valid; +} + +int main() { + + if (vkutil::setupInstance() != VK_SUCCESS) { + std::cerr << "Instance setup failed!\n"; + return EXIT_FAILURE; + } + + if (vkutil::setupDevice("NVIDIA") != VK_SUCCESS) { + std::cerr << "Device setup failed!\n"; + return EXIT_FAILURE; + } + + if (vkutil::setupCommandBuffers() != VK_SUCCESS) { + std::cerr << "Compute pipeline setup failed!\n"; + return EXIT_FAILURE; + } + + bool result_ok = run_tests(); + + if (vkutil::cleanup() != VK_SUCCESS) { + std::cerr << "Cleanup failed!\n"; + return EXIT_FAILURE; + } + + if (result_ok) { + std::cout << "All tests passed!\n"; + return EXIT_SUCCESS; + } + + std::cerr << "Test failed\n"; + return EXIT_FAILURE; +} diff --git a/sycl/test-e2e/bindless_images/vulkan_interop/sampled_images.cpp b/sycl/test-e2e/bindless_images/vulkan_interop/sampled_images.cpp index db48fb0341f61..973f27095bf32 100644 --- a/sycl/test-e2e/bindless_images/vulkan_interop/sampled_images.cpp +++ b/sycl/test-e2e/bindless_images/vulkan_interop/sampled_images.cpp @@ -12,61 +12,21 @@ #include "vulkan_common.hpp" -#include -#include -#include -#include - -template -std::ostream &operator<<(std::ostream &os, - const sycl::vec &vec) { - std::string str{""}; - for (int i = 0; i < NChannels; ++i) { - str += std::to_string(vec[i]) + ","; - } - str.pop_back(); - os << str; - return os; -} - -template -bool equal_vec(sycl::vec v1, sycl::vec v2) { - for (int i = 0; i < NChannels; ++i) { - if (v1[i] != v2[i]) { - return false; - } - } - - return true; -} - -template -constexpr sycl::vec initVector(DType val) { - if constexpr (NChannel == 1) { - return sycl::vec{val}; - } else if constexpr (NChannel == 2) { - return sycl::vec{val, val}; - } else if constexpr (NChannel == 4) { - return sycl::vec{val, val, val, val}; - } else { - std::cerr << "unsupported number of channels " << NChannel << "\n"; - exit(-1); - } -} +namespace syclexp = sycl::ext::oneapi::experimental; struct handles_t { - sycl::ext::oneapi::experimental::sampled_image_handle imgInput; - sycl::ext::oneapi::experimental::interop_mem_handle inputInteropMemHandle; + syclexp::sampled_image_handle imgInput; + syclexp::image_mem_handle imgMem; + syclexp::interop_mem_handle inputInteropMemHandle; }; -handles_t create_test_handles( - sycl::context &ctxt, sycl::device &dev, - const sycl::ext::oneapi::experimental::bindless_image_sampler &samp, - int input_image_fd, sycl::ext::oneapi::experimental::image_descriptor desc, - const size_t imgSize) { - namespace syclexp = sycl::ext::oneapi::experimental; +handles_t create_test_handles(sycl::context &ctxt, sycl::device &dev, + const syclexp::bindless_image_sampler &samp, + int input_image_fd, + syclexp::image_descriptor desc, + const size_t imgSize) { // Extension: external memory descriptor - syclexp::external_mem_descriptor inputExtMemDesc{ + syclexp::external_mem_descriptor inputExtMemDesc{ input_image_fd, imgSize}; // Extension: interop mem handle imported from file descriptor @@ -82,7 +42,7 @@ handles_t create_test_handles( syclexp::sampled_image_handle imgInput = syclexp::create_image(inputMappedMemHandle, samp, desc, dev, ctxt); - return {imgInput, inputInteropMemHandle}; + return {imgInput, inputMappedMemHandle, inputInteropMemHandle}; } template globalSize, sycl::range localSize, sycl::queue q(dev); auto ctxt = q.get_context(); - namespace syclexp = sycl::ext::oneapi::experimental; - // Image descriptor - mapped to Vulkan image layout - syclexp::image_descriptor desc(globalSize, COrder, CType, - syclexp::image_type::interop, - 1 /*num_levels*/); + syclexp::image_descriptor desc(globalSize, COrder, CType); syclexp::bindless_image_sampler samp( sycl::addressing_mode::repeat, @@ -176,6 +132,8 @@ bool run_sycl(sycl::range globalSize, sycl::range localSize, q.wait_and_throw(); syclexp::destroy_image_handle(handles.imgInput, dev, ctxt); + syclexp::free_image_mem(handles.imgMem, syclexp::image_type::standard, dev, + ctxt); syclexp::release_external_memory(handles.inputInteropMemHandle, dev, ctxt); } catch (sycl::exception e) { std::cerr << "\tKernel submission failed! " << e.what() << std::endl; @@ -245,9 +203,11 @@ bool run_test(sycl::range dims, sycl::range localSize, auto inputImage = vkutil::createImage(imgType, format, {width, height, depth}, VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | - VK_IMAGE_USAGE_STORAGE_BIT); + VK_IMAGE_USAGE_STORAGE_BIT, + 1 /*mipLevels*/); + VkMemoryRequirements memRequirements; auto inputImageMemoryTypeIndex = vkutil::getImageMemoryTypeIndex( - inputImage, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); + inputImage, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, memRequirements); auto inputMemory = vkutil::allocateDeviceMemory(imageSizeBytes, inputImageMemoryTypeIndex); VK_CHECK_CALL(vkBindImageMemory(vk_device, inputImage, inputMemory, @@ -281,7 +241,7 @@ bool run_test(sycl::range dims, sycl::range localSize, // Transition image layouts { VkImageMemoryBarrier barrierInput = - vkutil::createImageMemoryBarrier(inputImage); + vkutil::createImageMemoryBarrier(inputImage, 1 /*mipLevels*/); VkCommandBufferBeginInfo cbbi = {}; cbbi.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; diff --git a/sycl/test-e2e/bindless_images/vulkan_interop/unsampled_images.cpp b/sycl/test-e2e/bindless_images/vulkan_interop/unsampled_images.cpp index f9e23244350a4..99fbcf434de16 100644 --- a/sycl/test-e2e/bindless_images/vulkan_interop/unsampled_images.cpp +++ b/sycl/test-e2e/bindless_images/vulkan_interop/unsampled_images.cpp @@ -12,23 +12,20 @@ #include "vulkan_common.hpp" -#include -#include #include -#include -#include + +namespace syclexp = sycl::ext::oneapi::experimental; // Helpers and utilities namespace util { struct handles_t { - sycl::ext::oneapi::experimental::interop_mem_handle - input_interop_mem_handle_1, + syclexp::interop_mem_handle input_interop_mem_handle_1, input_interop_mem_handle_2, output_interop_mem_handle; - sycl::ext::oneapi::experimental::interop_semaphore_handle - sycl_wait_interop_semaphore_handle, + syclexp::image_mem_handle input_mem_handle_1, input_mem_handle_2, + output_mem_handle; + syclexp::interop_semaphore_handle sycl_wait_interop_semaphore_handle, sycl_done_interop_semaphore_handle; - sycl::ext::oneapi::experimental::unsampled_image_handle input_1, input_2, - output; + syclexp::unsampled_image_handle input_1, input_2, output; }; handles_t @@ -37,14 +34,13 @@ create_test_handles(sycl::context &ctxt, sycl::device &dev, int output_image_fd, int sycl_wait_semaphore_fd, int sycl_done_semaphore_fd, const size_t img_size, sycl::ext::oneapi::experimental::image_descriptor &desc) { - namespace syclexp = sycl::ext::oneapi::experimental; // Extension: map the external memory descriptors - syclexp::external_mem_descriptor - input_ext_mem_desc_1{input_image_fd_1, img_size}; - syclexp::external_mem_descriptor - input_ext_mem_desc_2{input_image_fd_2, img_size}; - syclexp::external_mem_descriptor - output_ext_mem_desc{output_image_fd, img_size}; + syclexp::external_mem_descriptor input_ext_mem_desc_1{ + input_image_fd_1, img_size}; + syclexp::external_mem_descriptor input_ext_mem_desc_2{ + input_image_fd_2, img_size}; + syclexp::external_mem_descriptor output_ext_mem_desc{ + output_image_fd, img_size}; // Extension: create interop memory handles syclexp::interop_mem_handle input_interop_mem_handle_1 = @@ -74,9 +70,9 @@ create_test_handles(sycl::context &ctxt, sycl::device &dev, syclexp::create_image(output_mapped_mem_handle, desc, dev, ctxt); // Extension: import semaphores - syclexp::external_semaphore_descriptor + syclexp::external_semaphore_descriptor sycl_wait_external_semaphore_desc{sycl_wait_semaphore_fd}; - syclexp::external_semaphore_descriptor + syclexp::external_semaphore_descriptor sycl_done_external_semaphore_desc{sycl_done_semaphore_fd}; syclexp::interop_semaphore_handle sycl_wait_interop_semaphore_handle = syclexp::import_external_semaphore(sycl_wait_external_semaphore_desc, dev, @@ -88,6 +84,9 @@ create_test_handles(sycl::context &ctxt, sycl::device &dev, return {input_interop_mem_handle_1, input_interop_mem_handle_2, output_interop_mem_handle, + input_mapped_mem_handle_1, + input_mapped_mem_handle_2, + output_mapped_mem_handle, sycl_wait_interop_semaphore_handle, sycl_done_interop_semaphore_handle, input_1, @@ -96,13 +95,18 @@ create_test_handles(sycl::context &ctxt, sycl::device &dev, } void cleanup_test(sycl::context &ctxt, sycl::device &dev, handles_t handles) { - namespace syclexp = sycl::ext::oneapi::experimental; syclexp::release_external_memory(handles.input_interop_mem_handle_1, dev, ctxt); syclexp::release_external_memory(handles.input_interop_mem_handle_2, dev, ctxt); syclexp::release_external_memory(handles.output_interop_mem_handle, dev, ctxt); + syclexp::free_image_mem(handles.input_mem_handle_1, + syclexp::image_type::standard, dev, ctxt); + syclexp::free_image_mem(handles.input_mem_handle_1, + syclexp::image_type::standard, dev, ctxt); + syclexp::free_image_mem(handles.output_mem_handle, + syclexp::image_type::standard, dev, ctxt); syclexp::destroy_external_semaphore( handles.sycl_wait_interop_semaphore_handle, dev, ctxt); syclexp::destroy_external_semaphore( @@ -158,12 +162,8 @@ void run_ndim_test(sycl::range global_size, sycl::queue q(dev); auto ctxt = q.get_context(); - namespace syclexp = sycl::ext::oneapi::experimental; - // Image descriptor - mapped to Vulkan image layout - syclexp::image_descriptor desc(global_size, order, CType, - syclexp::image_type::interop, - 1 /*num_levels*/); + syclexp::image_descriptor desc(global_size, order, CType); const size_t img_size = global_size.size() * sizeof(DType) * NChannels; @@ -239,6 +239,9 @@ void run_ndim_test(sycl::range global_size, // Wait for kernel completion before destroying external objects q.wait_and_throw(); + + // Cleanup + cleanup_test(ctxt, dev, handles); } catch (sycl::exception e) { std::cerr << "\tKernel submission failed! " << e.what() << std::endl; exit(-1); @@ -316,12 +319,12 @@ bool run_test(sycl::range dims, sycl::range local_size, // Transition image layouts { VkImageMemoryBarrier barrierInput1 = - vkutil::createImageMemoryBarrier(inVkImgRes1.vkImage); + vkutil::createImageMemoryBarrier(inVkImgRes1.vkImage, 1 /*mipLevels*/); VkImageMemoryBarrier barrierInput2 = - vkutil::createImageMemoryBarrier(inVkImgRes2.vkImage); + vkutil::createImageMemoryBarrier(inVkImgRes2.vkImage, 1 /*mipLevels*/); VkImageMemoryBarrier barrierOutput = - vkutil::createImageMemoryBarrier(outVkImgRes.vkImage); + vkutil::createImageMemoryBarrier(outVkImgRes.vkImage, 1 /*mipLevels*/); VkCommandBufferBeginInfo cbbi = {}; cbbi.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; diff --git a/sycl/test-e2e/bindless_images/vulkan_interop/vulkan_common.hpp b/sycl/test-e2e/bindless_images/vulkan_interop/vulkan_common.hpp index 6e1403e762158..65eeea0f7f307 100644 --- a/sycl/test-e2e/bindless_images/vulkan_interop/vulkan_common.hpp +++ b/sycl/test-e2e/bindless_images/vulkan_interop/vulkan_common.hpp @@ -11,6 +11,42 @@ void printString(std::string str) { #endif } +template +std::ostream &operator<<(std::ostream &os, + const sycl::vec &vec) { + std::string str{""}; + for (int i = 0; i < NChannels; ++i) { + str += std::to_string(vec[i]) + ","; + } + str.pop_back(); + os << str; + return os; +} + +template +bool equal_vec(sycl::vec v1, sycl::vec v2) { + for (int i = 0; i < NChannels; ++i) { + if (v1[i] != v2[i]) { + return false; + } + } + return true; +} + +template +constexpr sycl::vec initVector(DType val) { + if constexpr (NChannel == 1) { + return sycl::vec{val}; + } else if constexpr (NChannel == 2) { + return sycl::vec{val, val}; + } else if constexpr (NChannel == 4) { + return sycl::vec{val, val, val, val}; + } else { + std::cerr << "unsupported number of channels " << NChannel << "\n"; + exit(-1); + } +} + #define VK_CHECK_CALL_RET(call) \ { \ VkResult err = call; \ @@ -270,13 +306,14 @@ VkBuffer createBuffer(size_t size, VkBufferUsageFlags usage) { } VkImage createImage(VkImageType type, VkFormat format, VkExtent3D extent, - VkImageUsageFlags usage, bool exportable = true) { + VkImageUsageFlags usage, size_t mipLevels, + bool exportable = true) { VkImageCreateInfo ici = {}; ici.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; ici.imageType = type; ici.format = format; ici.extent = extent; - ici.mipLevels = 1; + ici.mipLevels = mipLevels; ici.arrayLayers = 1; // ici.tiling = VK_IMAGE_TILING_LINEAR; ici.usage = usage; @@ -323,8 +360,8 @@ VkDeviceMemory allocateDeviceMemory(size_t size, uint32_t memoryTypeIndex, return memory; } -uint32_t getImageMemoryTypeIndex(VkImage image, VkMemoryPropertyFlags flags) { - VkMemoryRequirements memRequirements; +uint32_t getImageMemoryTypeIndex(VkImage image, VkMemoryPropertyFlags flags, + VkMemoryRequirements &memRequirements) { vkGetImageMemoryRequirements(vk_device, image, &memRequirements); VkPhysicalDeviceMemoryProperties memProperties; @@ -404,7 +441,7 @@ int getSemaphoreOpaqueFD(VkSemaphore semaphore) { return fd; } -auto createImageMemoryBarrier(VkImage &img) { +auto createImageMemoryBarrier(VkImage &img, size_t mipLevels) { VkImageMemoryBarrier barrierInput = {}; barrierInput.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; barrierInput.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; @@ -413,7 +450,7 @@ auto createImageMemoryBarrier(VkImage &img) { barrierInput.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; barrierInput.image = img; barrierInput.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; - barrierInput.subresourceRange.levelCount = 1; + barrierInput.subresourceRange.levelCount = mipLevels; barrierInput.subresourceRange.layerCount = 1; barrierInput.srcAccessMask = 0; barrierInput.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; @@ -431,9 +468,11 @@ struct vulkan_image_test_resources_t { vkImage = vkutil::createImage(imgType, format, ext, VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | - VK_IMAGE_USAGE_STORAGE_BIT); + VK_IMAGE_USAGE_STORAGE_BIT, + 1); + VkMemoryRequirements memRequirements; auto inputImageMemoryTypeIndex = vkutil::getImageMemoryTypeIndex( - vkImage, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); + vkImage, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, memRequirements); imageMemory = vkutil::allocateDeviceMemory(imageSizeBytes, inputImageMemoryTypeIndex); VK_CHECK_CALL( diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index 4fe3042f807ab..e79720066e57a 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -3696,6 +3696,8 @@ _ZN4sycl3_V13ext6oneapi12experimental22get_image_channel_typeENS3_16image_mem_ha _ZN4sycl3_V13ext6oneapi12experimental22get_image_channel_typeENS3_16image_mem_handleERKNS0_6deviceERKNS0_7contextE _ZN4sycl3_V13ext6oneapi12experimental22get_image_num_channelsENS3_16image_mem_handleERKNS0_5queueE _ZN4sycl3_V13ext6oneapi12experimental22get_image_num_channelsENS3_16image_mem_handleERKNS0_6deviceERKNS0_7contextE +_ZN4sycl3_V13ext6oneapi12experimental22import_external_memoryINS3_11resource_fdEEENS3_18interop_mem_handleENS3_23external_mem_descriptorIT_EERKNS0_5queueE +_ZN4sycl3_V13ext6oneapi12experimental22import_external_memoryINS3_11resource_fdEEENS3_18interop_mem_handleENS3_23external_mem_descriptorIT_EERKNS0_6deviceERKNS0_7contextE _ZN4sycl3_V13ext6oneapi12experimental22import_external_memoryINS3_15external_mem_fdEEENS3_18interop_mem_handleENS3_23external_mem_descriptorIT_EERKNS0_5queueE _ZN4sycl3_V13ext6oneapi12experimental22import_external_memoryINS3_15external_mem_fdEEENS3_18interop_mem_handleENS3_23external_mem_descriptorIT_EERKNS0_6deviceERKNS0_7contextE _ZN4sycl3_V13ext6oneapi12experimental23prepare_for_device_copyEPKvmRKNS0_5queueE @@ -3706,6 +3708,8 @@ _ZN4sycl3_V13ext6oneapi12experimental24get_mip_level_mem_handleENS3_16image_mem_ _ZN4sycl3_V13ext6oneapi12experimental24get_mip_level_mem_handleENS3_16image_mem_handleEjRKNS0_6deviceERKNS0_7contextE _ZN4sycl3_V13ext6oneapi12experimental24release_from_device_copyEPKvRKNS0_5queueE _ZN4sycl3_V13ext6oneapi12experimental24release_from_device_copyEPKvRKNS0_7contextE +_ZN4sycl3_V13ext6oneapi12experimental25import_external_semaphoreINS3_11resource_fdEEENS3_24interop_semaphore_handleENS3_29external_semaphore_descriptorIT_EERKNS0_5queueE +_ZN4sycl3_V13ext6oneapi12experimental25import_external_semaphoreINS3_11resource_fdEEENS3_24interop_semaphore_handleENS3_29external_semaphore_descriptorIT_EERKNS0_6deviceERKNS0_7contextE _ZN4sycl3_V13ext6oneapi12experimental25import_external_semaphoreINS3_21external_semaphore_fdEEENS3_24interop_semaphore_handleENS3_29external_semaphore_descriptorIT_EERKNS0_5queueE _ZN4sycl3_V13ext6oneapi12experimental25import_external_semaphoreINS3_21external_semaphore_fdEEENS3_24interop_semaphore_handleENS3_29external_semaphore_descriptorIT_EERKNS0_6deviceERKNS0_7contextE _ZN4sycl3_V13ext6oneapi12experimental25map_external_image_memoryENS3_18interop_mem_handleERKNS3_16image_descriptorERKNS0_5queueE diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index 435176097e7bd..b43b1731c10da 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -419,8 +419,12 @@ ??$has_property@Vuse_primary_context@cuda@context@property@_V1@sycl@@@stream@_V1@sycl@@QEBA_NXZ ??$import_external_memory@Uexternal_mem_fd@experimental@oneapi@ext@_V1@sycl@@@experimental@oneapi@ext@_V1@sycl@@YA?AUinterop_mem_handle@01234@U?$external_mem_descriptor@Uexternal_mem_fd@experimental@oneapi@ext@_V1@sycl@@@01234@AEBVdevice@34@AEBVcontext@34@@Z ??$import_external_memory@Uexternal_mem_fd@experimental@oneapi@ext@_V1@sycl@@@experimental@oneapi@ext@_V1@sycl@@YA?AUinterop_mem_handle@01234@U?$external_mem_descriptor@Uexternal_mem_fd@experimental@oneapi@ext@_V1@sycl@@@01234@AEBVqueue@34@@Z +??$import_external_memory@Uresource_fd@experimental@oneapi@ext@_V1@sycl@@@experimental@oneapi@ext@_V1@sycl@@YA?AUinterop_mem_handle@01234@U?$external_mem_descriptor@Uresource_fd@experimental@oneapi@ext@_V1@sycl@@@01234@AEBVdevice@34@AEBVcontext@34@@Z +??$import_external_memory@Uresource_fd@experimental@oneapi@ext@_V1@sycl@@@experimental@oneapi@ext@_V1@sycl@@YA?AUinterop_mem_handle@01234@U?$external_mem_descriptor@Uresource_fd@experimental@oneapi@ext@_V1@sycl@@@01234@AEBVqueue@34@@Z ??$import_external_semaphore@Uexternal_semaphore_fd@experimental@oneapi@ext@_V1@sycl@@@experimental@oneapi@ext@_V1@sycl@@YA?AUinterop_semaphore_handle@01234@U?$external_semaphore_descriptor@Uexternal_semaphore_fd@experimental@oneapi@ext@_V1@sycl@@@01234@AEBVdevice@34@AEBVcontext@34@@Z ??$import_external_semaphore@Uexternal_semaphore_fd@experimental@oneapi@ext@_V1@sycl@@@experimental@oneapi@ext@_V1@sycl@@YA?AUinterop_semaphore_handle@01234@U?$external_semaphore_descriptor@Uexternal_semaphore_fd@experimental@oneapi@ext@_V1@sycl@@@01234@AEBVqueue@34@@Z +??$import_external_semaphore@Uresource_fd@experimental@oneapi@ext@_V1@sycl@@@experimental@oneapi@ext@_V1@sycl@@YA?AUinterop_semaphore_handle@01234@U?$external_semaphore_descriptor@Uresource_fd@experimental@oneapi@ext@_V1@sycl@@@01234@AEBVdevice@34@AEBVcontext@34@@Z +??$import_external_semaphore@Uresource_fd@experimental@oneapi@ext@_V1@sycl@@@experimental@oneapi@ext@_V1@sycl@@YA?AUinterop_semaphore_handle@01234@U?$external_semaphore_descriptor@Uresource_fd@experimental@oneapi@ext@_V1@sycl@@@01234@AEBVqueue@34@@Z ??0AccessorBaseHost@detail@_V1@sycl@@IEAA@AEBV?$shared_ptr@VAccessorImplHost@detail@_V1@sycl@@@std@@@Z ??0AccessorBaseHost@detail@_V1@sycl@@QEAA@$$QEAV0123@@Z ??0AccessorBaseHost@detail@_V1@sycl@@QEAA@AEBV0123@@Z From 009266795d8b917ae9533b1a0c1a8f8dc91b2cd5 Mon Sep 17 00:00:00 2001 From: Sean Stirling Date: Tue, 27 Feb 2024 14:00:04 +0000 Subject: [PATCH 2/8] Fix compilation issue --- .../sycl/ext/oneapi/bindless_images_descriptor.hpp | 4 ---- sycl/test-e2e/bindless_images/vulkan_interop/mipmaps.cpp | 8 ++++---- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/sycl/include/sycl/ext/oneapi/bindless_images_descriptor.hpp b/sycl/include/sycl/ext/oneapi/bindless_images_descriptor.hpp index 417aa777f5bf6..06b73c4ea9d4a 100644 --- a/sycl/include/sycl/ext/oneapi/bindless_images_descriptor.hpp +++ b/sycl/include/sycl/ext/oneapi/bindless_images_descriptor.hpp @@ -156,10 +156,6 @@ struct image_descriptor { } return; - case image_type::interop: - // No checks to be made. - return; - default: // Invalid image type. throw sycl::exception(sycl::errc::invalid, diff --git a/sycl/test-e2e/bindless_images/vulkan_interop/mipmaps.cpp b/sycl/test-e2e/bindless_images/vulkan_interop/mipmaps.cpp index ea26c85718796..052cb1994ce68 100644 --- a/sycl/test-e2e/bindless_images/vulkan_interop/mipmaps.cpp +++ b/sycl/test-e2e/bindless_images/vulkan_interop/mipmaps.cpp @@ -103,11 +103,11 @@ bool run_sycl(sycl::range globalSize, sycl::range localSize, float fdim2 = float(dim2 + 0.5f) / (float)depth; // Extension: read image data from handle (Vulkan imported) - VecType pixel1 = syclexp::read_mipmap< + VecType pixel1 = syclexp::sample_mipmap< std::conditional_t>( handles.imgInput, sycl::float3(fdim0, fdim1, fdim2), 0.0f); - VecType pixel2 = syclexp::read_mipmap< + VecType pixel2 = syclexp::sample_mipmap< std::conditional_t>( handles.imgInput, sycl::float3(fdim0, fdim1, fdim2), 1.0f); @@ -121,11 +121,11 @@ bool run_sycl(sycl::range globalSize, sycl::range localSize, float fdim1 = float(dim1 + 0.5f) / (float)height; // Extension: read image data from handle (Vulkan imported) - VecType pixel1 = syclexp::read_mipmap< + VecType pixel1 = syclexp::sample_mipmap< std::conditional_t>( handles.imgInput, sycl::float2(fdim0, fdim1), 0.0f); - VecType pixel2 = syclexp::read_mipmap< + VecType pixel2 = syclexp::sample_mipmap< std::conditional_t>( handles.imgInput, sycl::float2(fdim0, fdim1), 1.0f); From 8f5fd329cbd65da13d60432df28828ce63c888ee Mon Sep 17 00:00:00 2001 From: Sean Stirling Date: Thu, 29 Feb 2024 10:32:52 +0000 Subject: [PATCH 3/8] Update UR CMakeLists --- sycl/plugins/unified_runtime/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/plugins/unified_runtime/CMakeLists.txt b/sycl/plugins/unified_runtime/CMakeLists.txt index 28e288d695b98..d3e291f330913 100644 --- a/sycl/plugins/unified_runtime/CMakeLists.txt +++ b/sycl/plugins/unified_runtime/CMakeLists.txt @@ -63,7 +63,7 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT) # Date: Fri Jan 12 16:14:44 2024 +0000 # Merge pull request #1186 from hdelan/device-global-hip # [HIP] Add support for global variable read write - set(UNIFIED_RUNTIME_TAG 4fc4b4f56ac25b871f52f864b4b1da2560ec0afe) + set(UNIFIED_RUNTIME_TAG 50268cf3555ca49580662a39b3dd3f46ebe711c1) if(SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO) set(UNIFIED_RUNTIME_REPO "${SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO}") From 804b241f2eb82232a7ff3c5bb43a074e6a6eb398 Mon Sep 17 00:00:00 2001 From: Sean Stirling Date: Fri, 1 Mar 2024 09:52:03 +0000 Subject: [PATCH 4/8] Fix doc conflict markers --- .../experimental/sycl_ext_oneapi_bindless_images.asciidoc | 4 ---- 1 file changed, 4 deletions(-) diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_bindless_images.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_bindless_images.asciidoc index 5b3c70cf3797d..6f6ea449b54f5 100644 --- a/sycl/doc/extensions/experimental/sycl_ext_oneapi_bindless_images.asciidoc +++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_bindless_images.asciidoc @@ -194,11 +194,7 @@ enum class image_channel_type : /* unspecified */ { enum class image_type : /* unspecified */ { standard, mipmap, -<<<<<<< HEAD -======= array, - interop, ->>>>>>> sycl }; struct image_descriptor { From cc704d9a2dd215486e47b89973f5cb7b11cfa0ff Mon Sep 17 00:00:00 2001 From: Sean Stirling Date: Fri, 1 Mar 2024 10:03:32 +0000 Subject: [PATCH 5/8] Modify unsampled test to more appropriate cleanup order --- .../vulkan_interop/unsampled_images.cpp | 26 +++++++++---------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/sycl/test-e2e/bindless_images/vulkan_interop/unsampled_images.cpp b/sycl/test-e2e/bindless_images/vulkan_interop/unsampled_images.cpp index 640cd6967eeb4..703fe07fe7340 100644 --- a/sycl/test-e2e/bindless_images/vulkan_interop/unsampled_images.cpp +++ b/sycl/test-e2e/bindless_images/vulkan_interop/unsampled_images.cpp @@ -6,7 +6,7 @@ // RUN: %t.out // Uncomment to print additional test information -// #define VERBOSE_PRINT +#define VERBOSE_PRINT #include @@ -96,18 +96,6 @@ create_test_handles(sycl::context &ctxt, sycl::device &dev, } void cleanup_test(sycl::context &ctxt, sycl::device &dev, handles_t handles) { - syclexp::release_external_memory(handles.input_interop_mem_handle_1, dev, - ctxt); - syclexp::release_external_memory(handles.input_interop_mem_handle_2, dev, - ctxt); - syclexp::release_external_memory(handles.output_interop_mem_handle, dev, - ctxt); - syclexp::free_image_mem(handles.input_mem_handle_1, - syclexp::image_type::standard, dev, ctxt); - syclexp::free_image_mem(handles.input_mem_handle_1, - syclexp::image_type::standard, dev, ctxt); - syclexp::free_image_mem(handles.output_mem_handle, - syclexp::image_type::standard, dev, ctxt); syclexp::destroy_external_semaphore( handles.sycl_wait_interop_semaphore_handle, dev, ctxt); syclexp::destroy_external_semaphore( @@ -115,6 +103,18 @@ void cleanup_test(sycl::context &ctxt, sycl::device &dev, handles_t handles) { syclexp::destroy_image_handle(handles.input_1, dev, ctxt); syclexp::destroy_image_handle(handles.input_2, dev, ctxt); syclexp::destroy_image_handle(handles.output, dev, ctxt); + syclexp::free_image_mem(handles.input_mem_handle_1, + syclexp::image_type::standard, dev, ctxt); + syclexp::free_image_mem(handles.input_mem_handle_1, + syclexp::image_type::standard, dev, ctxt); + syclexp::free_image_mem(handles.output_mem_handle, + syclexp::image_type::standard, dev, ctxt); + syclexp::release_external_memory(handles.input_interop_mem_handle_1, dev, + ctxt); + syclexp::release_external_memory(handles.input_interop_mem_handle_2, dev, + ctxt); + syclexp::release_external_memory(handles.output_interop_mem_handle, dev, + ctxt); } template Date: Fri, 1 Mar 2024 10:07:46 +0000 Subject: [PATCH 6/8] Update UR CMakeLists --- sycl/plugins/unified_runtime/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/plugins/unified_runtime/CMakeLists.txt b/sycl/plugins/unified_runtime/CMakeLists.txt index d3e291f330913..e1893bee0a3d7 100644 --- a/sycl/plugins/unified_runtime/CMakeLists.txt +++ b/sycl/plugins/unified_runtime/CMakeLists.txt @@ -63,7 +63,7 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT) # Date: Fri Jan 12 16:14:44 2024 +0000 # Merge pull request #1186 from hdelan/device-global-hip # [HIP] Add support for global variable read write - set(UNIFIED_RUNTIME_TAG 50268cf3555ca49580662a39b3dd3f46ebe711c1) + set(UNIFIED_RUNTIME_TAG 710014de6195c957a88e5c4f8996af673d3db4d4) if(SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO) set(UNIFIED_RUNTIME_REPO "${SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO}") From 2e40ea04f38426d4eb2c3ed6d6c7d3bd7a148cde Mon Sep 17 00:00:00 2001 From: Sean Stirling Date: Fri, 1 Mar 2024 13:47:31 +0000 Subject: [PATCH 7/8] Comment out verbose print in unsampled interop test Co-authored-by: DBDuncan <43582941+DBDuncan@users.noreply.github.com> --- .../bindless_images/vulkan_interop/unsampled_images.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/test-e2e/bindless_images/vulkan_interop/unsampled_images.cpp b/sycl/test-e2e/bindless_images/vulkan_interop/unsampled_images.cpp index 703fe07fe7340..7f01281f7e678 100644 --- a/sycl/test-e2e/bindless_images/vulkan_interop/unsampled_images.cpp +++ b/sycl/test-e2e/bindless_images/vulkan_interop/unsampled_images.cpp @@ -6,7 +6,7 @@ // RUN: %t.out // Uncomment to print additional test information -#define VERBOSE_PRINT +// #define VERBOSE_PRINT #include From e490e8fb449991afa448d0175e2482a7899d202f Mon Sep 17 00:00:00 2001 From: Sean Stirling Date: Mon, 11 Mar 2024 13:48:45 +0000 Subject: [PATCH 8/8] Update UR CMakeLists --- sycl/plugins/unified_runtime/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/plugins/unified_runtime/CMakeLists.txt b/sycl/plugins/unified_runtime/CMakeLists.txt index 0029ebe827b70..c0ca886f95edd 100644 --- a/sycl/plugins/unified_runtime/CMakeLists.txt +++ b/sycl/plugins/unified_runtime/CMakeLists.txt @@ -63,7 +63,7 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT) # Date: Fri Jan 12 16:14:44 2024 +0000 # Merge pull request #1186 from hdelan/device-global-hip # [HIP] Add support for global variable read write - set(UNIFIED_RUNTIME_TAG cb37f2d169b375bf15258618a2aae1a33ae2ffe7) + set(UNIFIED_RUNTIME_TAG a07b0ed5f7d263a6fbbd1c75d9a8cd18d09329a2) if(SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO) set(UNIFIED_RUNTIME_REPO "${SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO}")