diff --git a/libclc/ptx-nvidiacl/libspirv/images/image.cl b/libclc/ptx-nvidiacl/libspirv/images/image.cl index b8a658c381da8..08cc9427a2a5b 100644 --- a/libclc/ptx-nvidiacl/libspirv/images/image.cl +++ b/libclc/ptx-nvidiacl/libspirv/images/image.cl @@ -216,6 +216,11 @@ pixelf32 as_pixelf32(int4 v) { return as_float4(v); } return (to_t##2)((to_t)from.x, (to_t)from.y); \ } +#define _DEFINE_VEC4_TO_SINGLE_CAST(from_t, to_t) \ + inline to_t cast_##from_t##4_to_##to_t(from_t##4 from) { \ + return (to_t)from[0]; \ + } + #define _DEFINE_CAST(from_t, to_t) \ inline to_t cast_##from_t##_to_##to_t(from_t from) { return (to_t)from; } @@ -278,6 +283,17 @@ _DEFINE_VEC4_TO_VEC2_CAST(float, half) _DEFINE_VEC4_TO_VEC2_CAST(int, uint) _DEFINE_VEC4_TO_VEC2_CAST(short, ushort) +_DEFINE_VEC4_TO_SINGLE_CAST(int, int) +_DEFINE_VEC4_TO_SINGLE_CAST(uint, uint) +_DEFINE_VEC4_TO_SINGLE_CAST(float, float) +_DEFINE_VEC4_TO_SINGLE_CAST(short, short) +_DEFINE_VEC4_TO_SINGLE_CAST(short, char) +_DEFINE_VEC4_TO_SINGLE_CAST(int, short) +_DEFINE_VEC4_TO_SINGLE_CAST(int, char) +_DEFINE_VEC4_TO_SINGLE_CAST(uint, ushort) +_DEFINE_VEC4_TO_SINGLE_CAST(uint, uchar) +_DEFINE_VEC4_TO_SINGLE_CAST(float, half) + _DEFINE_VEC2_CAST(int, float) _DEFINE_VEC2_CAST(short, char) _DEFINE_VEC2_CAST(short, uchar) @@ -332,6 +348,8 @@ _DEFINE_READ_3D_PIXELF(16, clamp) #undef _DEFINE_VEC4_CAST #undef _DEFINE_VEC2_CAST #undef _DEFINE_CAST +#undef _DEFINE_VEC4_TO_VEC2_CAST +#undef _DEFINE_VEC4_TO_SINGLE_CAST #undef _DEFINE_READ_1D_PIXELF #undef _DEFINE_READ_2D_PIXELF #undef _DEFINE_READ_3D_PIXELF @@ -3645,3 +3663,112 @@ _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_BUILTIN_ALL(half, DF16_, f, 16) #undef _NVVM_FUNC #undef NVVM_FUNC #undef MANGLE_FUNC_IMG_HANDLE_HELPER + + +// <--- CUBEMAP ---> +// Cubemap surfaces are handled through the layered images implementation + +// Define functions to call intrinsic +float4 +__nvvm_tex_cube_v4f32_f32(unsigned long, float, float, + float) __asm("__clc_llvm_nvvm_tex_cube_v4f32_f32"); +int4 __nvvm_tex_cube_v4i32_f32(unsigned long, float, float, float) __asm( + "__clc_llvm_nvvm_tex_cube_v4i32_f32"); +uint4 __nvvm_tex_cube_v4j32_f32(unsigned long, float, float, float) __asm( + "__clc_llvm_nvvm_tex_cube_v4j32_f32"); + +#define COORD_INPUT float x, float y, float z +#define COORD_THUNK_PARAMS x, y, z +#define COORD_PARAMS coord.x, coord.y, coord.z + +// Macro to generate cubemap fetches to call intrinsics +// float4, int4, uint4 already defined above +#define _CLC_DEFINE_CUBEMAP_BINDLESS_THUNK_READS_BUILTIN( \ + elem_t, fetch_elem_t, vec_size, fetch_vec_size, coord_input, coord_params) \ + elem_t __nvvm_tex_cube_##vec_size##_f32(unsigned long imageHandle, \ + coord_input) { \ + fetch_elem_t a = \ + __nvvm_tex_cube_##fetch_vec_size##_f32(imageHandle, coord_params); \ + return cast_##fetch_elem_t##_to_##elem_t(a); \ + } + +// Float +_CLC_DEFINE_CUBEMAP_BINDLESS_THUNK_READS_BUILTIN(float, float4, f32, v4f32, COORD_INPUT, COORD_THUNK_PARAMS) +_CLC_DEFINE_CUBEMAP_BINDLESS_THUNK_READS_BUILTIN(float2, float4, v2f32, v4f32, COORD_INPUT, COORD_THUNK_PARAMS) +// Int +_CLC_DEFINE_CUBEMAP_BINDLESS_THUNK_READS_BUILTIN(int, int4, i32, v4i32, COORD_INPUT, COORD_THUNK_PARAMS) +_CLC_DEFINE_CUBEMAP_BINDLESS_THUNK_READS_BUILTIN(int2, int4, v2i32, v4i32, COORD_INPUT, COORD_THUNK_PARAMS) +// Uint +_CLC_DEFINE_CUBEMAP_BINDLESS_THUNK_READS_BUILTIN(uint, uint4, j32, v4j32, COORD_INPUT, COORD_THUNK_PARAMS) +_CLC_DEFINE_CUBEMAP_BINDLESS_THUNK_READS_BUILTIN(uint2, uint4, v2j32, v4j32, COORD_INPUT, COORD_THUNK_PARAMS) +// Short +_CLC_DEFINE_CUBEMAP_BINDLESS_THUNK_READS_BUILTIN(short, int4, i16, v4i32, COORD_INPUT, COORD_THUNK_PARAMS) +_CLC_DEFINE_CUBEMAP_BINDLESS_THUNK_READS_BUILTIN(short2, int4, v2i16, v4i32, COORD_INPUT, COORD_THUNK_PARAMS) +_CLC_DEFINE_CUBEMAP_BINDLESS_THUNK_READS_BUILTIN(short4, int4, v4i16, v4i32, COORD_INPUT, COORD_THUNK_PARAMS) +// UShort +_CLC_DEFINE_CUBEMAP_BINDLESS_THUNK_READS_BUILTIN(ushort, uint4, t16, v4j32, COORD_INPUT, COORD_THUNK_PARAMS) +_CLC_DEFINE_CUBEMAP_BINDLESS_THUNK_READS_BUILTIN(ushort2, uint4, v2t16, v4j32, COORD_INPUT, COORD_THUNK_PARAMS) +_CLC_DEFINE_CUBEMAP_BINDLESS_THUNK_READS_BUILTIN(ushort4, uint4, v4t16, v4j32, COORD_INPUT, COORD_THUNK_PARAMS) +// Char +_CLC_DEFINE_CUBEMAP_BINDLESS_THUNK_READS_BUILTIN(char, int4, i8, v4i32, COORD_INPUT, COORD_THUNK_PARAMS) +_CLC_DEFINE_CUBEMAP_BINDLESS_THUNK_READS_BUILTIN(char2, int4, v2i8, v4i32, COORD_INPUT, COORD_THUNK_PARAMS) +_CLC_DEFINE_CUBEMAP_BINDLESS_THUNK_READS_BUILTIN(char4, int4, v4i8, v4i32, COORD_INPUT, COORD_THUNK_PARAMS) +// UChar +_CLC_DEFINE_CUBEMAP_BINDLESS_THUNK_READS_BUILTIN(uchar, uint4, h8, v4j32, COORD_INPUT, COORD_THUNK_PARAMS) +_CLC_DEFINE_CUBEMAP_BINDLESS_THUNK_READS_BUILTIN(uchar2, uint4, v2h8, v4j32, COORD_INPUT, COORD_THUNK_PARAMS) +_CLC_DEFINE_CUBEMAP_BINDLESS_THUNK_READS_BUILTIN(uchar4, uint4, v4h8, v4j32, COORD_INPUT, COORD_THUNK_PARAMS) +// Half +_CLC_DEFINE_CUBEMAP_BINDLESS_THUNK_READS_BUILTIN(half, float4, f16, v4f32, COORD_INPUT, COORD_THUNK_PARAMS) +_CLC_DEFINE_CUBEMAP_BINDLESS_THUNK_READS_BUILTIN(half2, float4, v2f16, v4f32, COORD_INPUT, COORD_THUNK_PARAMS) +_CLC_DEFINE_CUBEMAP_BINDLESS_THUNK_READS_BUILTIN(half4, float4, v4f16, v4f32, COORD_INPUT, COORD_THUNK_PARAMS) + +// Macro to generate the mangled names for cubemap fetches +#define _CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN(elem_t, elem_t_mangled, \ + vec_size, coord_mangled, \ + coord_input, coord_params) \ + _CLC_DEF elem_t MANGLE_FUNC_IMG_HANDLE( \ + 26, __spirv_ImageSampleCubemap, I, \ + elem_t_mangled##coord_mangled##ET0_T_T1_)(ulong imageHandle, \ + coord_input) { \ + return __nvvm_tex_cube_##vec_size##_f32(imageHandle, coord_params); \ + } + +// Float +_CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN(float, f, f32, Dv3_f, float3 coord, COORD_PARAMS) +_CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN(float2, Dv2_f, v2f32, Dv3_f, float3 coord, COORD_PARAMS) +_CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN(float4, Dv4_f, v4f32, Dv3_f, float3 coord, COORD_PARAMS) +// Int +_CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN(int, i, i32, Dv3_f, float3 coord, COORD_PARAMS) +_CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN(int2, Dv2_i, v2i32, Dv3_f, float3 coord, COORD_PARAMS) +_CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN(int4, Dv4_i, v4i32, Dv3_f, float3 coord, COORD_PARAMS) +// Uint +_CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN(uint, j, j32, Dv3_f, float3 coord, COORD_PARAMS) +_CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN(uint2, Dv2_j, v2j32, Dv3_f, float3 coord, COORD_PARAMS) +_CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN(uint4, Dv4_j, v4j32, Dv3_f, float3 coord, COORD_PARAMS) +// Short +_CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN(short, s, i16, Dv3_f, float3 coord, COORD_PARAMS) +_CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN(short2, Dv2_s, v2i16, Dv3_f, float3 coord, COORD_PARAMS) +_CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN(short4, Dv4_s, v4i16, Dv3_f, float3 coord, COORD_PARAMS) +// UShort +_CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN(ushort, t, t16, Dv3_f, float3 coord, COORD_PARAMS) +_CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN(ushort2, Dv2_t, v2t16, Dv3_f, float3 coord, COORD_PARAMS) +_CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN(ushort4, Dv4_t, v4t16, Dv3_f, float3 coord, COORD_PARAMS) +// Char +_CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN(char, a, i8, Dv3_f, float3 coord, COORD_PARAMS) +_CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN(char2, Dv2_a, v2i8, Dv3_f, float3 coord, COORD_PARAMS) +_CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN(char4, Dv4_a, v4i8, Dv3_f, float3 coord, COORD_PARAMS) +// UChar +_CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN(uchar, h, h8, Dv3_f, float3 coord, COORD_PARAMS) +_CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN(uchar2, Dv2_h, v2h8, Dv3_f, float3 coord, COORD_PARAMS) +_CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN(uchar4, Dv4_h, v4h8, Dv3_f, float3 coord, COORD_PARAMS) +// Half +_CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN(half, DF16_, f16, Dv3_f, float3 coord, COORD_PARAMS) +_CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN(half2, Dv2_DF16_, v2f16, Dv3_f, float3 coord, COORD_PARAMS) +_CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN(half4, Dv4_DF16_, v4f16, Dv3_f, float3 coord, COORD_PARAMS) + + +#undef _CLC_DEFINE_CUBEMAP_BINDLESS_THUNK_READS_BUILTIN +#undef COORD_INPUT +#undef COORD_THUNK_PARAMS +#undef COORD_PARAMS +#undef _CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN diff --git a/libclc/ptx-nvidiacl/libspirv/images/image_helpers.ll b/libclc/ptx-nvidiacl/libspirv/images/image_helpers.ll index 0bcec642bb270..89fad3d62299b 100644 --- a/libclc/ptx-nvidiacl/libspirv/images/image_helpers.ll +++ b/libclc/ptx-nvidiacl/libspirv/images/image_helpers.ll @@ -625,3 +625,28 @@ entry: %1 = tail call <4 x i32> @__clc_struct32_to_vector({i32,i32,i32,i32} %0) ret <4 x i32> %1 } + +; <--- CUBEMAP ---> +declare {float,float,float,float} @llvm.nvvm.tex.unified.cube.v4f32.f32(i64, float, float, float) +define <4 x float> @__clc_llvm_nvvm_tex_cube_v4f32_f32(i64 %img, float %x, float %y, float %z) nounwind alwaysinline { +entry: + %0 = tail call {float,float,float,float} @llvm.nvvm.tex.unified.cube.v4f32.f32(i64 %img, float %x, float %y, float %z); + %1 = tail call <4 x float> @__clc_structf32_to_vector({float,float,float,float} %0) + ret <4 x float> %1 +} + +declare {i32,i32,i32,i32} @llvm.nvvm.tex.unified.cube.v4s32.f32(i64, float, float, float) +define <4 x i32> @__clc_llvm_nvvm_tex_cube_v4i32_f32(i64 %img, float %x, float %y, float %z) nounwind alwaysinline { +entry: + %0 = tail call {i32,i32,i32,i32} @llvm.nvvm.tex.unified.cube.v4s32.f32(i64 %img, float %x, float %y, float %z); + %1 = tail call <4 x i32> @__clc_struct32_to_vector({i32,i32,i32,i32} %0) + ret <4 x i32> %1 +} + +declare {i32,i32,i32,i32} @llvm.nvvm.tex.unified.cube.v4u32.f32(i64, float, float, float) +define <4 x i32> @__clc_llvm_nvvm_tex_cube_v4j32_f32(i64 %img, float %x, float %y, float %z) nounwind alwaysinline { +entry: + %0 = tail call {i32,i32,i32,i32} @llvm.nvvm.tex.unified.cube.v4u32.f32(i64 %img, float %x, float %y, float %z); + %1 = tail call <4 x i32> @__clc_struct32_to_vector({i32,i32,i32,i32} %0) + ret <4 x i32> %1 +} diff --git a/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td b/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td index 57dba90d1f31d..4d923592bf242 100644 --- a/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td +++ b/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td @@ -61,6 +61,8 @@ def AspectExt_oneapi_interop_semaphore_export : Aspect<"ext_oneapi_interop_semap def AspectExt_oneapi_mipmap : Aspect<"ext_oneapi_mipmap">; def AspectExt_oneapi_mipmap_anisotropy : Aspect<"ext_oneapi_mipmap_anisotropy">; def AspectExt_oneapi_mipmap_level_reference : Aspect<"ext_oneapi_mipmap_level_reference">; +def AspectExt_oneapi_cubemap : Aspect<"ext_oneapi_cubemap">; +def AspectExt_oneapi_cubemap_seamless_filtering : Aspect<"ext_oneapi_cubemap_seamless_filtering">; def AspectExt_intel_esimd : Aspect<"ext_intel_esimd">; def AspectExt_oneapi_ballot_group : Aspect<"ext_oneapi_ballot_group">; def AspectExt_oneapi_fixed_size_group : Aspect<"ext_oneapi_fixed_size_group">; @@ -119,7 +121,8 @@ def : TargetInfo<"__TestAspectList", AspectExt_oneapi_bindless_images_shared_usm, AspectExt_oneapi_bindless_images_1d_usm, AspectExt_oneapi_bindless_images_2d_usm, AspectExt_oneapi_interop_memory_import, AspectExt_oneapi_interop_memory_export, AspectExt_oneapi_interop_semaphore_import, AspectExt_oneapi_interop_semaphore_export, - AspectExt_oneapi_mipmap, AspectExt_oneapi_mipmap_anisotropy, AspectExt_oneapi_mipmap_level_reference, AspectExt_intel_esimd, + AspectExt_oneapi_mipmap, AspectExt_oneapi_mipmap_anisotropy, AspectExt_oneapi_mipmap_level_reference, AspectExt_oneapi_cubemap, + AspectExt_oneapi_cubemap_seamless_filtering, AspectExt_intel_esimd, AspectExt_oneapi_ballot_group, AspectExt_oneapi_fixed_size_group, AspectExt_oneapi_opportunistic_group, AspectExt_oneapi_tangle_group, AspectExt_intel_matrix, AspectExt_oneapi_is_composite, AspectExt_oneapi_is_component, AspectExt_oneapi_graph, AspectExt_intel_fpga_task_sequence], diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_bindless_images.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_bindless_images.asciidoc index b1a067e33b6ae..1301e7d6f7f87 100644 --- a/sycl/doc/extensions/experimental/sycl_ext_oneapi_bindless_images.asciidoc +++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_bindless_images.asciidoc @@ -195,6 +195,7 @@ enum class image_type : /* unspecified */ { standard, mipmap, array, + cubemap, }; struct image_descriptor { @@ -232,14 +233,15 @@ struct image_descriptor { The image descriptor represents the image dimensions, channel type, and channel order. An `image_type` member is also present to allow for implementation of -mipmapped images. +mipmapped, image array, and cubemapped images. The `image_descriptor` shall be default constructible and follow by-value semantics. [NOTE] ==== -Additional future `image_type`s _may_ include "cubemap". +Additional future `image_type`s _may_ include combined image types like +"mipmapped cubemap". ==== Note that `image_channel_type` and `image_channel_order` existed in SYCL 1.2.1, @@ -655,12 +657,18 @@ through `free_image_mem`, or destroying the `image_mem` object, if one was used. The `bindless_image_sampler` struct shown below is used to set the sampling properties of `sampled_images` upon image creation. It can be used to set sampling properties that exist in the SYCL 2020 `image_sampler` as well as -extra properties used for sampling mipmaps including level-of-detail (LOD) and -anisotropic filtering. +extra properties used for sampling additional image types including +level-of-detail (LOD) and anisotropic filtering for mipmaps, and seamless +filtering for cubemaps. ```cpp namespace sycl::ext::oneapi::experimental { +enum class cubemap_filtering_mode : /* unspecified */ { + disjointed, + seamless, +}; + struct bindless_image_sampler { // Assign addressing mode to all dimensions @@ -675,6 +683,11 @@ struct bindless_image_sampler { float minMipmapLevelClamp, float maxMipmapLevelClamp, float maxAnisotropy); + bindless_image_sampler(sycl::addressing_mode addressing, + sycl::coordinate_normalization_mode coordinate, + sycl::filtering_mode filtering, + cubemap_filtering_mode cubemapFiltering); + // Specific addressing modes per dimension bindless_image_sampler(sycl::addressing_mode addressing[3], sycl::coordinate_normalization_mode coordinate, @@ -686,7 +699,11 @@ struct bindless_image_sampler { sycl::filtering_mode mipmapFiltering, float minMipmapLevelClamp, float maxMipmapLevelClamp, float maxAnisotropy); - + + bindless_image_sampler(sycl::addressing_mode addressing[3], + sycl::coordinate_normalization_mode coordinate, + sycl::filtering_mode filtering, + cubemap_filtering_mode cubemapFiltering); sycl::addressing_mode addressing[3] = {sycl::addressing_mode::none}; sycl::coordinate_normalization_mode coordinate = @@ -696,6 +713,8 @@ struct bindless_image_sampler { float min_mipmap_level_clamp = 0.f; float max_mipmap_level_clamp = 0.f; float max_anisotropy = 0.f; + ext::oneapi::experimental::cubemap_filtering_mode cubemap_filtering = + cubemap_filtering_mode::disjointed; }; } @@ -722,6 +741,19 @@ sample. This value cannot be higher than the number of allocated levels. `max_anisotropy` dictates the anisotropic ratio used when sampling the mipmap with anisotropic filtering. +`cubemap_filtering` dictates the method of sampling along cubemap face borders. +Disjointed indicates no sampling between faces whereas seamless indicates that +sampling across face boundaries is enabled. + +[NOTE] +==== +In CUDA, when seamless cubemap filtering is enabled, sampled image address modes +specified are ignored. Instead, if the `filtering` mode is set to `nearest` the +address mode `clamp_to_edge` will be applied for all dimensions. If the +`filtering` mode is set to `linear` then seamless cubemap filtering will be +performed when sampling along the cube face borders. +==== + === Explicit copies ```cpp @@ -1004,7 +1036,7 @@ DataT sample_image(const sampled_image_handle &ImageHandle, const CoordT &Coords); template -void write_image(unsampled_image_handle &ImageHandle, +void write_image(unsampled_image_handle ImageHandle, const CoordT &Coords, const DataT &Color); } ``` @@ -1262,7 +1294,7 @@ functions which take `image_descriptor` that has `image_type::array` and Currently there is no support for sampled image arrays. ==== -=== Copying image array data +=== Copying image array data [[copying_image_array_data]] When copying to or from image arrays, the user should copy to/from the entire array of images in one call to `ext_oneapi_copy` by passing the image arrays' @@ -1303,9 +1335,9 @@ provided that type is trivially copyable. ```c++ // Write to an unsampled image array template -DataT write_image_array(const unsampled_image_handle &ImageHandle, - const CoordT &Coords, const unsigned int ArrayLayer - const DataT &Color); +DataT write_image_array(unsampled_image_handle ImageHandle, + const CoordT &Coords, const unsigned int ArrayLayer + const DataT &Color); ``` Writing to an image array follows the same restrictions on what coordinate types @@ -1317,6 +1349,119 @@ Attempting to write to an image array with `write_image` or any other defined write function will result in undefined behaviour. ==== +== Cubemapped images + +Another image type this extension supports is cubemapped images. Cubemap images +are a specialisation of 2D image arrays that have exactly six layers +representing the faces of a cube where the width and height of each layer (cube +face) are equal. Cube mapping is a method of environment mapping, where the +environment is projected onto the sides of the cube. Cubemaps have been applied +in graphical systems such as skylight illumination, dynamic reflection, and +skyboxes. + +=== Querying cubemap support + +We provide the following device aspects to retrieve support information on a +SYCL implementation of just a couple of cubemap features. + +The device aspect descriptors for these queries are: + +[frame="none",options="header"] +|====================== +|Device descriptor |Description +|`aspect::ext_oneapi_cubemap` | Indicates if the device supports allocating +and accessing cubemap resources +|`aspect::ext_oneapi_cubemap_seamless_filtering` | Indicates if the device +supports sampling cubemapped images across face bounderies +|====================== + + +=== Allocation of cubemapped images + +As with all other image types, cubemap memory is allocated through +`alloc_image_mem` with the appropriately populated `image_descriptor`, where +`width` and `height` are equal, and the type is set to `image_type::cubemap`. +Since cubemaps are specialised image arrays, the `array_size` must be populated +with the only valid value, 6. Overriding this with any other value for +`array_size` could result in an exception or undefined behaviour. Cubemaps are +not supported with USM backed memory. + +Cubemap memory allocated this way requires the user to free that memory after +all operations using the memory are completed and no more operations operating +on the memory will be scheduled. This is done using `free_image_mem`, passing +`image_type::cubemap`. + +The RAII class `image_mem` may also be used to perform allocation and +deallocation of cubemapped device memory. The constructor and destructor act as +a wrapper for the functions `alloc_image_mem` and `free_image_mem` respectively. + +=== Obtaining a handle to a cubemap + +A handle to a cubemap is acquired in the same way as a `standard` image for both +an `unsampled_image_handle` and `sampled_image_handle`. We create the handle for +a cubemap through the appropriate `create_image` functions which take the +`image_descriptor` and `bindless_image_sampler` for a `sampled_image_handle`, or +just the `image_descriptor` for an `unsampled_image_handle`. + +As with allocation, the descriptor must be populated appropriately, i.e. +`image_type::cubemap`, `width` and `height` are equal, and `array_size` is equal +to 6. To sample a cubemap as expected, the cubemap sampling attribute of the +sampler, namely `seamless_filtering_mode`, must be defined. + +=== Copying cubemap image data + +In order to copy to or from cubemaps, the user should utilise the provided +`ext_oneapi_copy` functions following the details laid out in +<>. + +=== Reading, writing, and sampling a cubemap + +Cubemaps are supported as both unsampled and sampled images, however, the +meaning of their usage is quite different. + +An unsampled cubemap is treated as an image array with six layers, i.e. an +integer index denoting a face and two integer coordinates addressing a texel +within the layer corresponding to this face. Inside the kernel, this is done via +`fetch_cubemap`, passing the `unsampled_image_handle`, the integer coordinates, +`int2`, and an integer index denoting the face, `int`. Being an unsampled image, +a cubemap can be written with `write_cubemap`, passing the +`unsampled_image_handle`, the integer coordinates, `int2`, and an integer index +denoting the face, `int`. + +On the other hand, a sampled cubemap is addressed using three floating-point +coordinates `x`, `y`, and `z` that are interpreted as a direction vector +emanating from the centre of the cube and pointing to one face of the cube and a +texel within the layer corresponding to that face. Inside the kernel, this is +done via `sample_cubemap`, passing the `sampled_image_handle`, the +floating-point coordinates `x`, `y`, and `z`, as a `float3`. The method of +sampling depends on the sampler attributes passed upon creation of the cubemap. + +```c++ +// Unsampled cubemap read +template +DataT fetch_cubemap(const unsampled_image_handle &ImageHandle, + const int2 &Coords, + const int Face); + +// Sampled cubemap read +template +DataT sample_cubemap(const sampled_image_handle &ImageHandle, + const float3 &Vec); + +// Unsampled cubemap write +template +void write_cubemap(unsampled_image_handle ImageHandle, + const int2 &Coords, + const int Face, + const DataT &Color); +``` + +[NOTE] +==== +Attempting to read or write to a cubemap with any other defined read/write +function will result in undefined behaviour. +==== + == Interoperability === Querying interoperability support @@ -2005,6 +2150,137 @@ if (validated) { return 1; ``` +=== Sampling a cubemap + +```c++ +#include +#include + +int main() { + + namespace syclexp = sycl::ext::oneapi::experimental; + + sycl::device dev; + sycl::queue q(dev); + auto ctxt = q.get_context(); + + // declare image data + // width and height must be equal + size_t width = 8; + size_t height = 8; + size_t N = width * height; + std::vector out(N); + std::vector expected(N); + std::vector dataIn1(N * 6); + for (int i = 0; i < width; i++) { + for (int j = 0; j < height; j++) { + for (int k = 0; k < 6; k++) { + dataIn1[i + width * (j + height * k)] = {i + width * (j + height * k), + 0, 0, 0}; + } + } + } + + int j = 0; + for (int i = N - 1; i >= 0; i--) { + expected[j] = (float)i; + j++; + } + + // Extension: image descriptor - Cubemap + syclexp::image_descriptor desc( + {width, height}, sycl::image_channel_order::rgba, + sycl::image_channel_type::fp32, syclexp::image_type::cubemap, 1, 6); + + syclexp::bindless_image_sampler samp( + sycl::addressing_mode::clamp_to_edge, + sycl::coordinate_normalization_mode::normalized, + sycl::filtering_mode::nearest, syclexp::cubemap_filtering_mode::seamless); + + try { + // Extension: allocate memory on device and create the handle + syclexp::image_mem imgMem(desc, dev, ctxt); + + // Extension: create the image and return the handle + syclexp::sampled_image_handle imgHandle = + syclexp::create_image(imgMem, samp, desc, dev, ctxt); + + // Extension: copy over data to device (handler variant) + q.submit([&](sycl::handler &cgh) { + cgh.ext_oneapi_copy(dataIn1.data(), imgMem.get_handle(), desc); + }); + q.wait_and_throw(); + + sycl::buffer buf((float *)out.data(), + sycl::range<2>{height, width}); + q.submit([&](sycl::handler &cgh) { + auto outAcc = buf.get_access( + cgh, sycl::range<2>{height, width}); + + // Emanating vector scans one face + cgh.parallel_for( + sycl::nd_range<2>{{width, height}, {width, height}}, + [=](sycl::nd_item<2> it) { + size_t dim0 = it.get_local_id(0); + size_t dim1 = it.get_local_id(1); + + // Direction Vector + // x -- largest magnitude + // y -- shifted between [-0.99, 0.99] + offset + // z -- shifted between [-0.99, 0.99] + offset + // + // [-0.99, 0.99] -- maintains x as largest magnitude + // + // 4 elems == [-1, -0.5, 0, 0.5] -- need offset to bring uniformity + // +0.25 = [-0.75, -0.25, 0.25, 0.75] + float fdim0 = 1.f; + float fdim1 = (((float(dim0) / (float)width) * 1.98) - 0.99) + + (1.f / (float)width); + float fdim2 = (((float(dim1) / (float)height) * 1.98) - 0.99) + + (1.f / (float)height); + + // Extension: read texture cubemap data from handle + sycl::float4 px = syclexp::sample_cubemap( + imgHandle, sycl::float3(fdim0, fdim1, fdim2)); + + outAcc[sycl::id<2>{dim0, dim1}] = px[0]; + }); + }); + q.wait_and_throw(); + + // Extension: cleanup + syclexp::destroy_image_handle(imgHandle, dev, ctxt); + } catch (sycl::exception e) { + std::cerr << "SYCL exception caught! : " << e.what() << "\n"; + return 1; + } catch (...) { + std::cerr << "Unknown exception caught!\n"; + return 2; + } + + // collect and validate output + bool validated = true; + for (int i = 0; i < N; i++) { + bool mismatch = false; + if (out[i] != expected[i]) { + mismatch = true; + validated = false; + } + if (mismatch) { + std::cout << "Result mismatch! Expected: " << expected[i] + << ", Actual: " << out[i] << std::endl; + } + } + if (validated) { + std::cout << "Test passed!" << std::endl; + return 0; + } + + std::cout << "Test failed!" << std::endl; + return 3; +} +``` + === Using imported memory and semaphore objects ```c++ @@ -2187,7 +2463,6 @@ There are dimension specific limitations: These features still need to be handled: * Level Zero and SPIR-V support -* Cubemap images == Revision History @@ -2328,4 +2603,13 @@ These features still need to be handled: - `image_descriptor::verify()` member function added. |5.5|2024-02-27| - Update interop with mipmap interop and slight redesign - `interop` removed from `image_type` +|5.6|2024-03-04| - Added cubemap support. + - Allocation of cubemaps. + - Creation of cubemaps. + - Fetching/writing of unsampled cubemaps and sampling cubemaps. + - `image_type::cubemap` added to enum. + - Cubemap example. + - Updated `image_array_write` with non-const handle parameter. + - Removed `&` reference qualifier from `write_xxx` handle + parameter. |====================== diff --git a/sycl/include/CL/__spirv/spirv_ops.hpp b/sycl/include/CL/__spirv/spirv_ops.hpp index 5dcb2ff056921..caa50f4a53084 100644 --- a/sycl/include/CL/__spirv/spirv_ops.hpp +++ b/sycl/include/CL/__spirv/spirv_ops.hpp @@ -221,6 +221,10 @@ template extern __DPCPP_SYCL_EXTERNAL TempRetT __spirv_ImageSampleExplicitLod(SampledType, TempArgT, int, TempArgT, TempArgT); +template +extern __DPCPP_SYCL_EXTERNAL TempRetT __spirv_ImageSampleCubemap(SampledType, + TempArgT); + #define __SYCL_OpGroupAsyncCopyGlobalToLocal __spirv_GroupAsyncCopy #define __SYCL_OpGroupAsyncCopyLocalToGlobal __spirv_GroupAsyncCopy diff --git a/sycl/include/sycl/detail/image_ocl_types.hpp b/sycl/include/sycl/detail/image_ocl_types.hpp index 93d557e509f47..e5d52af04c916 100644 --- a/sycl/include/sycl/detail/image_ocl_types.hpp +++ b/sycl/include/sycl/detail/image_ocl_types.hpp @@ -104,6 +104,18 @@ static void __invoke__ImageArrayWrite(ImageT Img, CoordT Coords, int ArrayLayer, Img, TmpCoords, ArrayLayer, TmpVal); } +template +static RetType __invoke__ImageReadCubemap(SmpImageT SmpImg, DirVecT DirVec) { + + // Convert from sycl types to builtin types to get correct function mangling. + using TempRetT = sycl::detail::ConvertToOpenCLType_t; + auto TmpDirVec = sycl::detail::convertToOpenCLType(DirVec); + + return sycl::detail::convertFromOpenCLTypeFor( + __spirv_ImageSampleCubemap( + SmpImg, TmpDirVec)); +} + template static RetType __invoke__ImageReadLod(SmpImageT SmpImg, CoordT Coords, float Level) { diff --git a/sycl/include/sycl/detail/pi.h b/sycl/include/sycl/detail/pi.h index 3c9076e09f66b..816b7d7e58ec3 100644 --- a/sycl/include/sycl/detail/pi.h +++ b/sycl/include/sycl/detail/pi.h @@ -156,9 +156,17 @@ // piextEnqueueCooperativeKernelLaunch. // 15.46 Add piextGetGlobalVariablePointer // 15.47 Added PI_ERROR_FEATURE_UNSUPPORTED. +// 15.49 Added cubemap support: +// - Added cubemap image type, PI_MEM_TYPE_IMAGE_CUBEMAP, to _pi_mem_type +// - Added cubemap sampling capabilities +// - _pi_sampler_cubemap_filter_mode +// - PI_SAMPLER_PROPERTIES_CUBEMAP_FILTER_MODE +// - Added device queries for cubemap support +// - PI_EXT_ONEAPI_DEVICE_INFO_CUBEMAP_SUPPORT +// - PI_EXT_ONEAPI_DEVICE_INFO_CUBEMAP_SEAMLESS_FILTERING_SUPPORT #define _PI_H_VERSION_MAJOR 15 -#define _PI_H_VERSION_MINOR 47 +#define _PI_H_VERSION_MINOR 49 #define _PI_STRING_HELPER(a) #a #define _PI_CONCAT(a, b) _PI_STRING_HELPER(a.b) @@ -445,6 +453,10 @@ typedef enum { // Composite device PI_EXT_ONEAPI_DEVICE_INFO_COMPONENT_DEVICES = 0x20111, PI_EXT_ONEAPI_DEVICE_INFO_COMPOSITE_DEVICE = 0x20112, + + // Bindless images cubemaps + PI_EXT_ONEAPI_DEVICE_INFO_CUBEMAP_SUPPORT = 0x20115, + PI_EXT_ONEAPI_DEVICE_INFO_CUBEMAP_SEAMLESS_FILTERING_SUPPORT = 0x20116, } _pi_device_info; typedef enum { @@ -575,7 +587,8 @@ typedef enum { PI_MEM_TYPE_IMAGE2D_ARRAY = 0x10F3, PI_MEM_TYPE_IMAGE1D = 0x10F4, PI_MEM_TYPE_IMAGE1D_ARRAY = 0x10F5, - PI_MEM_TYPE_IMAGE1D_BUFFER = 0x10F6 + PI_MEM_TYPE_IMAGE1D_BUFFER = 0x10F6, + PI_MEM_TYPE_IMAGE_CUBEMAP = 0x10F7, } _pi_mem_type; typedef enum { @@ -690,6 +703,11 @@ typedef enum { PI_SAMPLER_FILTER_MODE_LINEAR = 0x1141, } _pi_sampler_filter_mode; +typedef enum { + PI_SAMPLER_CUBEMAP_FILTER_MODE_DISJOINTED = 0x1142, + PI_SAMPLER_CUBEMAP_FILTER_MODE_SEAMLESS = 0x1143, +} _pi_sampler_cubemap_filter_mode; + using pi_context_properties = intptr_t; using pi_device_exec_capabilities = pi_bitfield; @@ -704,6 +722,8 @@ constexpr pi_sampler_properties PI_SAMPLER_PROPERTIES_NORMALIZED_COORDS = constexpr pi_sampler_properties PI_SAMPLER_PROPERTIES_ADDRESSING_MODE = 0x1153; constexpr pi_sampler_properties PI_SAMPLER_PROPERTIES_FILTER_MODE = 0x1154; constexpr pi_sampler_properties PI_SAMPLER_PROPERTIES_MIP_FILTER_MODE = 0x1155; +constexpr pi_sampler_properties PI_SAMPLER_PROPERTIES_CUBEMAP_FILTER_MODE = + 0x1156; using pi_memory_order_capabilities = pi_bitfield; constexpr pi_memory_order_capabilities PI_MEMORY_ORDER_RELAXED = 0x01; @@ -812,6 +832,7 @@ using pi_image_channel_type = _pi_image_channel_type; using pi_buffer_create_type = _pi_buffer_create_type; using pi_sampler_addressing_mode = _pi_sampler_addressing_mode; using pi_sampler_filter_mode = _pi_sampler_filter_mode; +using pi_sampler_cubemap_filter_mode = _pi_sampler_cubemap_filter_mode; using pi_sampler_info = _pi_sampler_info; using pi_event_status = _pi_event_status; using pi_program_build_info = _pi_program_build_info; diff --git a/sycl/include/sycl/device_aspect_macros.hpp b/sycl/include/sycl/device_aspect_macros.hpp index edd10c23b6099..bece91ffe52e1 100644 --- a/sycl/include/sycl/device_aspect_macros.hpp +++ b/sycl/include/sycl/device_aspect_macros.hpp @@ -323,6 +323,16 @@ #define __SYCL_ALL_DEVICES_HAVE_ext_intel_fpga_task_sequence__ 0 #endif +#ifndef __SYCL_ALL_DEVICES_HAVE_ext_oneapi_cubemap__ +// __SYCL_ASPECT(ext_oneapi_cubemap, 65) +#define __SYCL_ALL_DEVICES_HAVE_ext_oneapi_cubemap__ 0 +#endif + +#ifndef __SYCL_ALL_DEVICES_HAVE_ext_oneapi_cubemap_seamless_filtering__ +// __SYCL_ASPECT(ext_oneapi_cubemap_seamless_filtering, 66) +#define __SYCL_ALL_DEVICES_HAVE_ext_oneapi_cubemap_seamless_filtering__ 0 +#endif + #ifndef __SYCL_ANY_DEVICE_HAS_host__ // __SYCL_ASPECT(host, 0) #define __SYCL_ANY_DEVICE_HAS_host__ 0 @@ -637,3 +647,13 @@ // __SYCL_ASPECT(ext_intel_fpga_task_sequence__, 62) #define __SYCL_ANY_DEVICE_HAS_ext_intel_fpga_task_sequence__ 0 #endif + +#ifndef __SYCL_ANY_DEVICE_HAS_ext_oneapi_cubemap__ +// __SYCL_ASPECT(ext_oneapi_cubemap, 65) +#define __SYCL_ANY_DEVICE_HAS_ext_oneapi_cubemap__ 0 +#endif + +#ifndef __SYCL_ANY_DEVICE_HAS_ext_oneapi_cubemap_seamless_filtering__ +// __SYCL_ASPECT(ext_oneapi_cubemap_seamless_filtering, 66) +#define __SYCL_ANY_DEVICE_HAS_ext_oneapi_cubemap_seamless_filtering__ 0 +#endif diff --git a/sycl/include/sycl/ext/oneapi/bindless_images.hpp b/sycl/include/sycl/ext/oneapi/bindless_images.hpp index ee8672c14709c..c01e714991f27 100644 --- a/sycl/include/sycl/ext/oneapi/bindless_images.hpp +++ b/sycl/include/sycl/ext/oneapi/bindless_images.hpp @@ -1145,6 +1145,54 @@ DataT fetch_image_array(const unsampled_image_handle &imageHandle #endif } +/** + * @brief Fetch data from an unsampled cubemap image using its handle + * + * @tparam DataT The return type + * + * @param imageHandle The image handle + * @param coords The coordinates at which to fetch image data (int2 only) + * @param face The cubemap face at which to fetch + * @return Image data + */ +template +DataT fetch_cubemap(const unsampled_image_handle &imageHandle, + const int2 &coords, const unsigned int face) { + return fetch_image_array(imageHandle, coords, face); +} + +/** + * @brief Sample a cubemap image using its handle + * + * @tparam DataT The return type + * + * @param imageHandle The image handle + * @param dirVec The direction vector at which to sample image data (float3 + * only) + * @return Image data + */ +template +DataT sample_cubemap(const sampled_image_handle &imageHandle [[maybe_unused]], + const sycl::float3 &dirVec [[maybe_unused]]) { + +#ifdef __SYCL_DEVICE_ONLY__ + if constexpr (detail::is_recognized_standard_type()) { + return __invoke__ImageReadCubemap(imageHandle.raw_handle, + dirVec); + } else { + static_assert(sizeof(HintT) == sizeof(DataT), + "When trying to read a user-defined type, HintT must be of " + "the same size as the user-defined DataT."); + static_assert(detail::is_recognized_standard_type(), + "HintT must always be a recognized standard type"); + return sycl::bit_cast(__invoke__ImageReadCubemap( + imageHandle.raw_handle, dirVec)); + } +#else + assert(false); // Bindless images not yet implemented on host +#endif +} + /** * @brief Write to an unsampled image using its handle * @@ -1156,7 +1204,7 @@ DataT fetch_image_array(const unsampled_image_handle &imageHandle * @param color The data to write */ template -void write_image(const unsampled_image_handle &imageHandle [[maybe_unused]], +void write_image(unsampled_image_handle imageHandle [[maybe_unused]], const CoordT &coords [[maybe_unused]], const DataT &color [[maybe_unused]]) { detail::assert_unsampled_coords(); @@ -1191,8 +1239,7 @@ void write_image(const unsampled_image_handle &imageHandle [[maybe_unused]], * @param color The data to write */ template -void write_image_array(const unsampled_image_handle &imageHandle - [[maybe_unused]], +void write_image_array(unsampled_image_handle imageHandle [[maybe_unused]], const CoordT &coords [[maybe_unused]], const int arrayLayer [[maybe_unused]], const DataT &color [[maybe_unused]]) { @@ -1217,6 +1264,22 @@ void write_image_array(const unsampled_image_handle &imageHandle #endif } +/** + * @brief Write to an unsampled cubemap using its handle + * + * @tparam DataT The data type to write + * + * @param imageHandle The image handle + * @param coords The coordinates at which to write image data (int2 only) + * @param face The cubemap face at which to write + * @param color The data to write + */ +template +void write_cubemap(unsampled_image_handle imageHandle, const sycl::int2 &coords, + const int face, const DataT &color) { + return write_image_array(imageHandle, coords, face, color); +} + } // namespace ext::oneapi::experimental } // namespace _V1 } // namespace sycl diff --git a/sycl/include/sycl/ext/oneapi/bindless_images_descriptor.hpp b/sycl/include/sycl/ext/oneapi/bindless_images_descriptor.hpp index 06b73c4ea9d4a..88710876e7eca 100644 --- a/sycl/include/sycl/ext/oneapi/bindless_images_descriptor.hpp +++ b/sycl/include/sycl/ext/oneapi/bindless_images_descriptor.hpp @@ -26,7 +26,7 @@ enum class image_type : unsigned int { standard = 0, mipmap = 1, array = 2, - cubemap = 3, /* Not implemented */ + cubemap = 3, }; /// A struct to describe the properties of an image. @@ -156,10 +156,28 @@ struct image_descriptor { } return; - default: - // Invalid image type. - throw sycl::exception(sycl::errc::invalid, - "Invalid image descriptor image type"); + case image_type::cubemap: + if (this->array_size != 6) { + // Cubemaps must have an array size of 6. + throw sycl::exception(sycl::errc::invalid, + "Cubemap images must have array_size of 6 only! " + "Use image_type::array instead."); + } + if (this->depth != 0 || this->height == 0 || + this->width != this->height) { + // Cubemaps must be 2D + throw sycl::exception( + sycl::errc::invalid, + "Cubemap images must be square with valid and equivalent width and " + "height! Use image_type::array instead."); + } + if (this->num_levels != 1) { + // Cubemaps cannot be mipmaps. + throw sycl::exception(sycl::errc::invalid, + "Cannot have mipmap cubemaps! Either num_levels " + "or array_size must be 1."); + } + return; } } }; diff --git a/sycl/include/sycl/ext/oneapi/bindless_images_sampler.hpp b/sycl/include/sycl/ext/oneapi/bindless_images_sampler.hpp index 5d225e0ad3cc8..f29d387ed575c 100644 --- a/sycl/include/sycl/ext/oneapi/bindless_images_sampler.hpp +++ b/sycl/include/sycl/ext/oneapi/bindless_images_sampler.hpp @@ -14,23 +14,28 @@ namespace sycl { inline namespace _V1 { namespace ext::oneapi::experimental { +/// cubemap filtering mode enum +enum class cubemap_filtering_mode : unsigned int { + disjointed = PI_SAMPLER_CUBEMAP_FILTER_MODE_DISJOINTED, + seamless = PI_SAMPLER_CUBEMAP_FILTER_MODE_SEAMLESS, +}; + struct bindless_image_sampler { - bindless_image_sampler(sycl::addressing_mode addressing[3], + bindless_image_sampler(sycl::addressing_mode addr[3], sycl::coordinate_normalization_mode coordinate, sycl::filtering_mode filtering) - : addressing{addressing[0], addressing[1], addressing[2]}, - coordinate(coordinate), filtering(filtering) {} + : addressing{addr[0], addr[1], addr[2]}, coordinate(coordinate), + filtering(filtering) {} - bindless_image_sampler(sycl::addressing_mode addressing[3], + bindless_image_sampler(sycl::addressing_mode addr[3], sycl::coordinate_normalization_mode coordinate, sycl::filtering_mode filtering, sycl::filtering_mode mipmapFiltering, float minMipmapLevelClamp, float maxMipmapLevelClamp, float maxAnisotropy) - : addressing{addressing[0], addressing[1], addressing[2]}, - coordinate(coordinate), filtering(filtering), - mipmap_filtering(mipmapFiltering), + : addressing{addr[0], addr[1], addr[2]}, coordinate(coordinate), + filtering(filtering), mipmap_filtering(mipmapFiltering), min_mipmap_level_clamp(minMipmapLevelClamp), max_mipmap_level_clamp(maxMipmapLevelClamp), max_anisotropy(maxAnisotropy) {} @@ -55,11 +60,26 @@ struct bindless_image_sampler { max_mipmap_level_clamp(maxMipmapLevelClamp), max_anisotropy(maxAnisotropy) {} + bindless_image_sampler(sycl::addressing_mode addr, + sycl::coordinate_normalization_mode coordinate, + sycl::filtering_mode filtering, + cubemap_filtering_mode cubemapFiltering) + : addressing{addr, addr, addr}, coordinate(coordinate), + filtering(filtering), cubemap_filtering(cubemapFiltering) {} + + bindless_image_sampler(sycl::addressing_mode addr[3], + sycl::coordinate_normalization_mode coordinate, + sycl::filtering_mode filtering, + cubemap_filtering_mode cubemapFiltering) + : addressing{addr[0], addr[1], addr[2]}, coordinate(coordinate), + filtering(filtering), cubemap_filtering(cubemapFiltering) {} + sycl::addressing_mode addressing[3] = {sycl::addressing_mode::none}; sycl::coordinate_normalization_mode coordinate = sycl::coordinate_normalization_mode::unnormalized; sycl::filtering_mode filtering = sycl::filtering_mode::nearest; sycl::filtering_mode mipmap_filtering = sycl::filtering_mode::nearest; + cubemap_filtering_mode cubemap_filtering = cubemap_filtering_mode::disjointed; float min_mipmap_level_clamp = 0.f; float max_mipmap_level_clamp = 0.f; float max_anisotropy = 0.f; diff --git a/sycl/include/sycl/info/aspects.def b/sycl/include/sycl/info/aspects.def index 4324015822c17..a059a52110110 100644 --- a/sycl/include/sycl/info/aspects.def +++ b/sycl/include/sycl/info/aspects.def @@ -57,3 +57,5 @@ __SYCL_ASPECT(ext_oneapi_is_composite, 59) __SYCL_ASPECT(ext_oneapi_is_component, 60) __SYCL_ASPECT(ext_oneapi_graph, 61) __SYCL_ASPECT(ext_intel_fpga_task_sequence, 62) +__SYCL_ASPECT(ext_oneapi_cubemap, 65) +__SYCL_ASPECT(ext_oneapi_cubemap_seamless_filtering, 66) diff --git a/sycl/plugins/unified_runtime/CMakeLists.txt b/sycl/plugins/unified_runtime/CMakeLists.txt index 52bbfcb923136..305b2a1b182c6 100644 --- a/sycl/plugins/unified_runtime/CMakeLists.txt +++ b/sycl/plugins/unified_runtime/CMakeLists.txt @@ -57,12 +57,12 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT) include(FetchContent) set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git") - # commit 410c02365eb8505ace69d3cb6b8192dbe0077161 - # Author: Kenneth Benzie (Benie) - # Date: Thu Apr 4 10:23:33 2024 +0200 - # Merge pull request #1448 from steffenlarsen/steffen/make_ext_func_fail_unsupported - # [OpenCL] Make extension function lookup return unusupported error - set(UNIFIED_RUNTIME_TAG 410c02365eb8505ace69d3cb6b8192dbe0077161) + # commit 26cc04e258b82696bfd7738d08d95e34db4aecf6 + # Author: aarongreig + # Date: Mon Apr 8 09:52:16 2024 +0100 + # Merge pull request #1433 from Seanst98/sean/cubemaps-UR + # [Bindless][CUDA] Add support for cubemaps + set(UNIFIED_RUNTIME_TAG 26cc04e258b82696bfd7738d08d95e34db4aecf6) if(SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO) set(UNIFIED_RUNTIME_REPO "${SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO}") diff --git a/sycl/plugins/unified_runtime/pi2ur.hpp b/sycl/plugins/unified_runtime/pi2ur.hpp index f13e11334748d..624687c398a6a 100644 --- a/sycl/plugins/unified_runtime/pi2ur.hpp +++ b/sycl/plugins/unified_runtime/pi2ur.hpp @@ -1264,6 +1264,11 @@ inline pi_result piDeviceGetInfo(pi_device Device, pi_device_info ParamName, PI_TO_UR_MAP_DEVICE_INFO( PI_EXT_ONEAPI_DEVICE_INFO_MIPMAP_LEVEL_REFERENCE_SUPPORT, UR_DEVICE_INFO_MIPMAP_LEVEL_REFERENCE_SUPPORT_EXP) + PI_TO_UR_MAP_DEVICE_INFO(PI_EXT_ONEAPI_DEVICE_INFO_CUBEMAP_SUPPORT, + UR_DEVICE_INFO_CUBEMAP_SUPPORT_EXP) + PI_TO_UR_MAP_DEVICE_INFO( + PI_EXT_ONEAPI_DEVICE_INFO_CUBEMAP_SEAMLESS_FILTERING_SUPPORT, + UR_DEVICE_INFO_CUBEMAP_SEAMLESS_FILTERING_SUPPORT_EXP) PI_TO_UR_MAP_DEVICE_INFO( PI_EXT_ONEAPI_DEVICE_INFO_INTEROP_MEMORY_IMPORT_SUPPORT, UR_DEVICE_INFO_INTEROP_MEMORY_IMPORT_SUPPORT_EXP) @@ -2947,6 +2952,8 @@ static void pi2urImageDesc(const pi_image_format *ImageFormat, UR_MEM_TYPE_IMAGE1D_ARRAY) PI_TO_UR_MAP_IMAGE_TYPE(PI_MEM_TYPE_IMAGE1D_BUFFER, UR_MEM_TYPE_IMAGE1D_BUFFER) + PI_TO_UR_MAP_IMAGE_TYPE(PI_MEM_TYPE_IMAGE_CUBEMAP, + UR_MEM_TYPE_IMAGE_CUBEMAP_EXP) #undef PI_TO_UR_MAP_IMAGE_TYPE default: { die("piMemImageCreate: unsuppported image_type."); @@ -4974,8 +4981,12 @@ inline pi_result piextBindlessImageSamplerCreate( ur_exp_sampler_addr_modes_t UrAddrModes{}; UrAddrModes.stype = UR_STRUCTURE_TYPE_EXP_SAMPLER_ADDR_MODES; UrMipProps.pNext = &UrAddrModes; - int addrIndex = 0; + + ur_exp_sampler_cubemap_properties_t UrCubemapProps{}; + UrCubemapProps.stype = UR_STRUCTURE_TYPE_EXP_SAMPLER_CUBEMAP_PROPERTIES; + UrAddrModes.pNext = &UrCubemapProps; + const pi_sampler_properties *CurProperty = SamplerProperties; while (*CurProperty != 0) { switch (*CurProperty) { @@ -5026,6 +5037,20 @@ inline pi_result piextBindlessImageSamplerCreate( UrMipProps.mipFilterMode = UR_SAMPLER_FILTER_MODE_LINEAR; } break; + case PI_SAMPLER_PROPERTIES_CUBEMAP_FILTER_MODE: { + pi_sampler_cubemap_filter_mode CurValueFilterMode = + ur_cast( + ur_cast(*(++CurProperty))); + + if (CurValueFilterMode == PI_SAMPLER_CUBEMAP_FILTER_MODE_SEAMLESS) + UrCubemapProps.cubemapFilterMode = + UR_EXP_SAMPLER_CUBEMAP_FILTER_MODE_SEAMLESS; + else if (CurValueFilterMode == PI_SAMPLER_CUBEMAP_FILTER_MODE_DISJOINTED) + UrCubemapProps.cubemapFilterMode = + UR_EXP_SAMPLER_CUBEMAP_FILTER_MODE_DISJOINTED; + + } break; + default: break; } diff --git a/sycl/source/detail/bindless_images.cpp b/sycl/source/detail/bindless_images.cpp index f16437cb0cd7a..9971712f0cfdd 100644 --- a/sycl/source/detail/bindless_images.cpp +++ b/sycl/source/detail/bindless_images.cpp @@ -30,9 +30,11 @@ void populate_pi_structs(const image_descriptor &desc, pi_image_desc &piDesc, piDesc.image_depth = desc.depth; if (desc.array_size > 1) { - // Image array. - piDesc.image_type = - desc.height > 0 ? PI_MEM_TYPE_IMAGE2D_ARRAY : PI_MEM_TYPE_IMAGE1D_ARRAY; + // Image array or cubemap + piDesc.image_type = desc.type == image_type::cubemap + ? PI_MEM_TYPE_IMAGE_CUBEMAP + : desc.height > 0 ? PI_MEM_TYPE_IMAGE2D_ARRAY + : PI_MEM_TYPE_IMAGE1D_ARRAY; } else { piDesc.image_type = desc.depth > 0 @@ -266,7 +268,8 @@ __SYCL_EXPORT void free_image_mem(image_mem_handle memHandle, sycl::detail::PiApiKind::piextMemMipmapFree>( C, Device, memHandle.raw_handle); } else if (imageType == image_type::standard || - imageType == image_type::array) { + imageType == image_type::array || + imageType == image_type::cubemap) { Plugin->call( C, Device, memHandle.raw_handle); @@ -431,6 +434,8 @@ create_image(void *devPtr, size_t pitch, const bindless_image_sampler &sampler, static_cast(sampler.filtering), PI_SAMPLER_PROPERTIES_MIP_FILTER_MODE, static_cast(sampler.mipmap_filtering), + PI_SAMPLER_PROPERTIES_CUBEMAP_FILTER_MODE, + static_cast(sampler.cubemap_filtering), 0}; pi_sampler piSampler = {}; diff --git a/sycl/source/detail/device_impl.cpp b/sycl/source/detail/device_impl.cpp index 7028b16d0d338..35199933b8784 100644 --- a/sycl/source/detail/device_impl.cpp +++ b/sycl/source/detail/device_impl.cpp @@ -546,6 +546,23 @@ bool device_impl::has(aspect Aspect) const { sizeof(pi_bool), &support, nullptr) == PI_SUCCESS; return call_successful && support; } + case aspect::ext_oneapi_cubemap: { + pi_bool support = PI_FALSE; + bool call_successful = + getPlugin()->call_nocheck( + MDevice, PI_EXT_ONEAPI_DEVICE_INFO_CUBEMAP_SUPPORT, sizeof(pi_bool), + &support, nullptr) == PI_SUCCESS; + return call_successful && support; + } + case aspect::ext_oneapi_cubemap_seamless_filtering: { + pi_bool support = PI_FALSE; + bool call_successful = + getPlugin()->call_nocheck( + MDevice, + PI_EXT_ONEAPI_DEVICE_INFO_CUBEMAP_SEAMLESS_FILTERING_SUPPORT, + sizeof(pi_bool), &support, nullptr) == PI_SUCCESS; + return call_successful && support; + } case aspect::ext_intel_esimd: { pi_bool support = PI_FALSE; bool call_successful = diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 5b40f1b3b07c3..89968e06d1a32 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -1029,6 +1029,12 @@ void handler::ext_oneapi_copy( // Image Array. PiDesc.image_type = Desc.height > 0 ? PI_MEM_TYPE_IMAGE2D_ARRAY : PI_MEM_TYPE_IMAGE1D_ARRAY; + + // Cubemap. + PiDesc.image_type = + Desc.type == sycl::ext::oneapi::experimental::image_type::cubemap + ? PI_MEM_TYPE_IMAGE_CUBEMAP + : PiDesc.image_type; } else { PiDesc.image_type = Desc.depth > 0 @@ -1076,6 +1082,12 @@ void handler::ext_oneapi_copy( // Image Array. PiDesc.image_type = DestImgDesc.height > 0 ? PI_MEM_TYPE_IMAGE2D_ARRAY : PI_MEM_TYPE_IMAGE1D_ARRAY; + + // Cubemap. + PiDesc.image_type = + DestImgDesc.type == sycl::ext::oneapi::experimental::image_type::cubemap + ? PI_MEM_TYPE_IMAGE_CUBEMAP + : PiDesc.image_type; } else { PiDesc.image_type = DestImgDesc.depth > 0 ? PI_MEM_TYPE_IMAGE3D @@ -1121,6 +1133,12 @@ void handler::ext_oneapi_copy( // Image Array. PiDesc.image_type = Desc.height > 0 ? PI_MEM_TYPE_IMAGE2D_ARRAY : PI_MEM_TYPE_IMAGE1D_ARRAY; + + // Cubemap. + PiDesc.image_type = + Desc.type == sycl::ext::oneapi::experimental::image_type::cubemap + ? PI_MEM_TYPE_IMAGE_CUBEMAP + : PiDesc.image_type; } else { PiDesc.image_type = Desc.depth > 0 @@ -1168,6 +1186,12 @@ void handler::ext_oneapi_copy( // Image Array. PiDesc.image_type = SrcImgDesc.height > 0 ? PI_MEM_TYPE_IMAGE2D_ARRAY : PI_MEM_TYPE_IMAGE1D_ARRAY; + + // Cubemap. + PiDesc.image_type = + SrcImgDesc.type == sycl::ext::oneapi::experimental::image_type::cubemap + ? PI_MEM_TYPE_IMAGE_CUBEMAP + : PiDesc.image_type; } else { PiDesc.image_type = SrcImgDesc.depth > 0 ? PI_MEM_TYPE_IMAGE3D @@ -1213,6 +1237,12 @@ void handler::ext_oneapi_copy( // Image Array. PiDesc.image_type = Desc.height > 0 ? PI_MEM_TYPE_IMAGE2D_ARRAY : PI_MEM_TYPE_IMAGE1D_ARRAY; + + // Cubemap. + PiDesc.image_type = + Desc.type == sycl::ext::oneapi::experimental::image_type::cubemap + ? PI_MEM_TYPE_IMAGE_CUBEMAP + : PiDesc.image_type; } else { PiDesc.image_type = Desc.depth > 0 @@ -1262,6 +1292,13 @@ void handler::ext_oneapi_copy( // Image Array. PiDesc.image_type = DeviceImgDesc.height > 0 ? PI_MEM_TYPE_IMAGE2D_ARRAY : PI_MEM_TYPE_IMAGE1D_ARRAY; + + // Cubemap. + PiDesc.image_type = + DeviceImgDesc.type == + sycl::ext::oneapi::experimental::image_type::cubemap + ? PI_MEM_TYPE_IMAGE_CUBEMAP + : PiDesc.image_type; } else { PiDesc.image_type = DeviceImgDesc.depth > 0 ? PI_MEM_TYPE_IMAGE3D diff --git a/sycl/test-e2e/bindless_images/cubemap/cubemap_sampled.cpp b/sycl/test-e2e/bindless_images/cubemap/cubemap_sampled.cpp new file mode 100644 index 0000000000000..d2ba5da27d5f3 --- /dev/null +++ b/sycl/test-e2e/bindless_images/cubemap/cubemap_sampled.cpp @@ -0,0 +1,141 @@ +// REQUIRES: linux,cuda,aspect-ext_oneapi_cubemap +// REQUIRES: aspect-ext_oneapi_cubemap_seamless_filtering + +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out + +#include +#include + +// Uncomment to print additional test information. +// #define VERBOSE_PRINT + +class sample_cubemap; + +int main() { + + namespace syclexp = sycl::ext::oneapi::experimental; + + sycl::device dev; + sycl::queue q(dev); + auto ctxt = q.get_context(); + + // Declare image data. + // Width and height must be equal. + size_t width = 8; + size_t height = 8; + size_t N = width * height; + std::vector out(N); + std::vector expected(N); + std::vector dataIn1(N * 6); + for (int i = 0; i < width; i++) { + for (int j = 0; j < height; j++) { + for (int k = 0; k < 6; k++) { + dataIn1[i + width * (j + height * k)] = {i + width * (j + height * k), + 0, 0, 0}; + } + } + } + + int j = 0; + for (int i = N - 1; i >= 0; i--) { + expected[j] = static_cast(i); + j++; + } + + // Extension: image descriptor - Cubemap. + syclexp::image_descriptor desc( + {width, height}, sycl::image_channel_order::rgba, + sycl::image_channel_type::fp32, syclexp::image_type::cubemap, 1, 6); + + syclexp::bindless_image_sampler samp( + sycl::addressing_mode::clamp_to_edge, + sycl::coordinate_normalization_mode::normalized, + sycl::filtering_mode::nearest, syclexp::cubemap_filtering_mode::seamless); + + try { + // Extension: allocate memory on device and create the handle. + syclexp::image_mem imgMem(desc, dev, ctxt); + + // Extension: create the image and return the handle. + syclexp::sampled_image_handle imgHandle = + syclexp::create_image(imgMem, samp, desc, dev, ctxt); + + // Extension: copy over data to device (handler variant). + q.submit([&](sycl::handler &cgh) { + cgh.ext_oneapi_copy(dataIn1.data(), imgMem.get_handle(), desc); + }); + q.wait_and_throw(); + + sycl::buffer buf((float *)out.data(), + sycl::range<2>{height, width}); + q.submit([&](sycl::handler &cgh) { + auto outAcc = buf.get_access( + cgh, sycl::range<2>{height, width}); + + // Emanating vector scans one face. + cgh.parallel_for( + sycl::nd_range<2>{{width, height}, {width, height}}, + [=](sycl::nd_item<2> it) { + size_t dim0 = it.get_local_id(0); + size_t dim1 = it.get_local_id(1); + + // Direction Vector + // x -- largest magnitude + // y -- shifted between [-0.99, 0.99] + offset + // z -- shifted between [-0.99, 0.99] + offset + // + // [-0.99, 0.99] -- maintains x as largest magnitude + // + // 4 elems == [-1, -0.5, 0, 0.5] -- need offset to bring uniformity + // +0.25 = [-0.75, -0.25, 0.25, 0.75] + float fdim0 = 1.f; + float fdim1 = (((float(dim0) / (float)width) * 1.98) - 0.99) + + (1.f / (float)width); + float fdim2 = (((float(dim1) / (float)height) * 1.98) - 0.99) + + (1.f / (float)height); + + // Extension: sample cubemap data from handle. + sycl::float4 px = syclexp::sample_cubemap( + imgHandle, sycl::float3(fdim0, fdim1, fdim2)); + + outAcc[sycl::id<2>{dim0, dim1}] = px[0]; + }); + }); + q.wait_and_throw(); + + // Extension: cleanup. + syclexp::destroy_image_handle(imgHandle, dev, ctxt); + } catch (sycl::exception e) { + std::cerr << "SYCL exception caught! : " << e.what() << "\n"; + return 1; + } catch (...) { + std::cerr << "Unknown exception caught!\n"; + return 2; + } + + // Collect and validate output. + bool validated = true; + for (int i = 0; i < N; i++) { + bool mismatch = false; + if (out[i] != expected[i]) { + mismatch = true; + validated = false; + } + if (mismatch) { +#ifdef VERBOSE_PRINT + std::cout << "Result mismatch! Expected: " << expected[i] + << ", Actual: " << out[i] << std::endl; +#else + break; +#endif + } + } + if (validated) { + std::cout << "Test passed!" << std::endl; + return 0; + } + + std::cout << "Test failed!" << std::endl; + return 3; +} diff --git a/sycl/test-e2e/bindless_images/cubemap/cubemap_unsampled.cpp b/sycl/test-e2e/bindless_images/cubemap/cubemap_unsampled.cpp new file mode 100644 index 0000000000000..3b2ba56f4acfb --- /dev/null +++ b/sycl/test-e2e/bindless_images/cubemap/cubemap_unsampled.cpp @@ -0,0 +1,293 @@ +// REQUIRES: linux,cuda,aspect-ext_oneapi_cubemap + +// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out +// RUN: %t.out + +#include "../bindless_helpers.hpp" +#include +#include +#include +#include + +static sycl::device dev; + +// Uncomment to print additional test information. +// #define VERBOSE_PRINT + +namespace syclexp = sycl::ext::oneapi::experimental; + +// Helpers and utilities. +struct util { + template + static void run_ndim_test(sycl::queue q, sycl::range<3> globalSize, + sycl::range<3> localSize, + syclexp::unsampled_image_handle input_0, + syclexp::unsampled_image_handle input_1, + syclexp::unsampled_image_handle output) { + using VecType = sycl::vec; + try { + q.submit([&](sycl::handler &cgh) { + cgh.parallel_for( + sycl::nd_range<3>{globalSize, localSize}, [=](sycl::nd_item<3> it) { + size_t dim0 = it.get_global_id(0); + size_t dim1 = it.get_global_id(1); + size_t dim2 = it.get_global_id(2); + + if constexpr (NChannels >= 1) { + VecType px1 = syclexp::fetch_cubemap( + input_0, sycl::int2(dim0, dim1), int(dim2)); + VecType px2 = syclexp::fetch_cubemap( + input_1, sycl::int2(dim0, dim1), int(dim2)); + + auto sum = VecType( + bindless_helpers::add_kernel(px1, px2)); + syclexp::write_cubemap(output, sycl::int2(dim0, dim1), + int(dim2), VecType(sum)); + } else { + DType px1 = syclexp::fetch_cubemap( + input_0, sycl::int2(dim0, dim1), int(dim2)); + DType px2 = syclexp::fetch_cubemap( + input_1, sycl::int2(dim0, dim1), int(dim2)); + + auto sum = DType( + bindless_helpers::add_kernel(px1, px2)); + syclexp::write_cubemap(output, sycl::int2(dim0, dim1), + int(dim2), DType(sum)); + } + }); + }); + } catch (sycl::exception e) { + std::cout << "\tKernel submission failed! " << e.what() << std::endl; + exit(-1); + } catch (...) { + std::cout << "\tKernel submission failed!" << std::endl; + exit(-1); + } + } +}; + +template +bool run_test(sycl::range<2> dims, sycl::range<3> localSize, + unsigned int seed = 0) { + using VecType = sycl::vec; + + sycl::queue q(dev); + + // Skip half tests if not supported. + if constexpr (std::is_same_v) { + if (!dev.has(sycl::aspect::fp16)) { +#ifdef VERBOSE_PRINT + std::cout << "Test skipped due to lack of device support for fp16\n"; +#endif + return false; + } + } + + size_t num_elems = dims.size() * 6; + + std::vector input_0(num_elems); + std::vector input_1(num_elems); + std::vector expected(num_elems); + std::vector actual(num_elems); + + std::srand(seed); + bindless_helpers::fill_rand(input_0, seed); + bindless_helpers::fill_rand(input_1, seed); + bindless_helpers::add_host(input_0, input_1, expected); + + try { + syclexp::image_descriptor desc(dims, COrder, CType, + syclexp::image_type::cubemap, 1, 6); + + // Extension: allocate memory on device and create the handle. + syclexp::image_mem img_mem_0(desc, q); + syclexp::image_mem img_mem_1(desc, q); + syclexp::image_mem img_mem_2(desc, q); + + auto img_input_0 = syclexp::create_image(img_mem_0, desc, q); + auto img_input_1 = syclexp::create_image(img_mem_1, desc, q); + auto img_output = syclexp::create_image(img_mem_2, desc, q); + + // Extension: copy over data to device. + q.ext_oneapi_copy(input_0.data(), img_mem_0.get_handle(), desc); + q.ext_oneapi_copy(input_1.data(), img_mem_1.get_handle(), desc); + q.wait(); + { + sycl::range<3> globalSize = {dims[0], dims[1], 6}; + util::run_ndim_test( + q, globalSize, localSize, img_input_0, img_input_1, img_output); + q.wait(); + q.ext_oneapi_copy(img_mem_2.get_handle(), actual.data(), desc); + q.wait(); + } + + // Cleanup. + syclexp::destroy_image_handle(img_input_0, q); + syclexp::destroy_image_handle(img_input_1, q); + syclexp::destroy_image_handle(img_output, q); + } catch (sycl::exception e) { + std::cerr << "SYCL exception caught! : " << e.what() << "\n"; + exit(-1); + } catch (...) { + std::cerr << "Unknown exception caught!\n"; + exit(-1); + } + + // Collect and validate output. + bool validated = true; + for (int i = 0; i < num_elems; i++) { + for (int j = 0; j < NChannels; ++j) { + bool mismatch = false; + if (actual[i][j] != expected[i][j]) { + mismatch = true; + validated = false; + } + if (mismatch) { +#ifdef VERBOSE_PRINT + std::cout << "\tResult mismatch at [" << i << "][" << j + << "] Expected: " << +expected[i][j] + << ", Actual: " << +actual[i][j] << std::endl; +#else + break; +#endif + } + } + } +#ifdef VERBOSE_PRINT + if (validated) { + std::cout << "\tTest passed!" << std::endl; + } else { + std::cout << "\tTest failed!\n"; + } +#endif + + return !validated; +} + +void printTestName(std::string name) { +#ifdef VERBOSE_PRINT + std::cout << name; +#endif +} + +int main() { + + unsigned int seed = 0; + bool failed = false; + + printTestName("Running cube int\n"); + failed |= run_test( + {32, 32}, {16, 16, 2}, seed); + printTestName("Running cube int2\n"); + failed |= run_test( + {128, 128}, {16, 16, 3}, seed); + printTestName("Running cube int4\n"); + failed |= run_test( + {64, 64}, {32, 16, 1}, seed); + + printTestName("Running cube unsigned int\n"); + failed |= run_test( + {15, 15}, {5, 3, 1}, seed); + printTestName("Running cube unsigned int2\n"); + failed |= run_test( + {90, 90}, {10, 9, 3}, seed); + printTestName("Running cube unsigned int4\n"); + failed |= run_test( + {1024, 1024}, {16, 16, 2}, seed); + + printTestName("Running cube short\n"); + failed |= run_test( + {8, 8}, {2, 2, 1}, seed); + printTestName("Running cube short2\n"); + failed |= run_test( + {8, 8}, {4, 4, 2}, seed); + printTestName("Running cube short4\n"); + failed |= run_test( + {8, 8}, {8, 8, 3}, seed); + + printTestName("Running cube unsigned short\n"); + failed |= + run_test( + {75, 75}, {25, 5, 1}, seed); + printTestName("Running cube unsigned short2\n"); + failed |= + run_test( + {75, 75}, {15, 3, 2}, seed); + printTestName("Running cube unsigned short4\n"); + failed |= + run_test( + {75, 75}, {5, 25, 3}, seed); + + printTestName("Running cube char\n"); + failed |= run_test( + {60, 60}, {10, 6, 1}, seed); + printTestName("Running cube char2\n"); + failed |= run_test( + {60, 60}, {5, 3, 2}, seed); + printTestName("Running cube char4\n"); + failed |= run_test( + {60, 60}, {6, 10, 3}, seed); + + printTestName("Running cube unsigned char\n"); + failed |= run_test( + {128, 128}, {16, 16, 3}, seed); + printTestName("Running cube unsigned char2\n"); + failed |= run_test( + {128, 128}, {16, 16, 3}, seed); + printTestName("Running cube unsigned char4\n"); + failed |= run_test( + {128, 128}, {16, 16, 3}, seed); + + printTestName("Running cube float\n"); + failed |= run_test( + {1024, 1024}, {16, 16, 1}, seed); + printTestName("Running cube float2\n"); + failed |= run_test( + {1024, 1024}, {16, 16, 3}, seed); + printTestName("Running cube float4\n"); + failed |= run_test( + {1024, 1024}, {16, 16, 2}, seed); + + printTestName("Running cube half\n"); + failed |= run_test( + {48, 48}, {8, 8, 1}, seed); + printTestName("Running cube half2\n"); + failed |= run_test( + {48, 48}, {8, 8, 3}, seed); + printTestName("Running cube half4\n"); + failed |= run_test( + {48, 48}, {8, 8, 2}, seed); + + if (failed) { + std::cerr << "An error has occured!\n"; + return 1; + } + + std::cout << "All tests passed!\n"; + return 0; +} diff --git a/sycl/test-e2e/bindless_images/image_get_info.cpp b/sycl/test-e2e/bindless_images/image_get_info.cpp index 931332b928cb4..a0869a00bf348 100644 --- a/sycl/test-e2e/bindless_images/image_get_info.cpp +++ b/sycl/test-e2e/bindless_images/image_get_info.cpp @@ -97,6 +97,17 @@ int main() { << mipmapLevelReferenceSupport << "\n"; #endif + // Extension: query for bindless image cubemaps support -- aspects. + bool cubemapSupport = dev.has(sycl::aspect::ext_oneapi_cubemap); + bool cubemapSeamlessFilterSupport = + dev.has(sycl::aspect::ext_oneapi_cubemap_seamless_filtering); + +#ifdef VERBOSE_PRINT + std::cout << "cubemapSupport: " << cubemapSupport + << "\ncubemapSeamlessFilterSupport: " + << cubemapSeamlessFilterSupport << "\n"; +#endif + // Extension: query for bindless image interop support -- device aspects bool interopMemoryImportSupport = dev.has(sycl::aspect::ext_oneapi_interop_memory_import);