From 33e7a6c9d3f36c13c548278a240b4a2dd49346b1 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Wed, 10 Apr 2024 07:58:50 -0700 Subject: [PATCH 1/6] Remove ref to bfloat16 math aspect. This aspect is completely unused in the latest repo and it's documentation leads to confusion. bfloat16 math functions are supported for all devices. Signed-off-by: JackAKirk --- ...xt_oneapi_bfloat16_math_functions.asciidoc | 21 ++++++------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_bfloat16_math_functions.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_bfloat16_math_functions.asciidoc index 3261a94b17cdf..7b3f817fb22b6 100644 --- a/sycl/doc/extensions/experimental/sycl_ext_oneapi_bfloat16_math_functions.asciidoc +++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_bfloat16_math_functions.asciidoc @@ -67,6 +67,12 @@ The descriptions of the `fma`, `fmin`, `fmax`, `fabs`, `isnan`, `ceil`, `floor`, specification: https://www.khronos.org/registry/SYCL/specs/sycl-2020/html/sycl-2020.html#_math_functions. +[NOTE] +The bfloat16 type is supported on all devices. DPC++ currently supports some +bfloat16 type math functions natively on Intel Xe HP GPUs and Nvidia GPUs with +Compute Capability >= SM80. On other devices, and in host code, such functions +are emulated in software. + == Specification === Feature test macro @@ -86,21 +92,6 @@ supports. |1 |The APIs of this experimental extension are not versioned, so the feature-test macro always has this value. |=== -=== Extension to `enum class aspect` - -[source] ----- -namespace sycl { -enum class aspect { - ... - sycl_ext_oneapi_bfloat16_math_functions -} -} ----- - -If a SYCL device has the `sycl_ext_oneapi_bfloat16_math_functions` aspect, -then it supports the `bfloat16` math functions described in the next section. - === Math Functions The following functions are only available when `T` is `bfloat16` or From 348f2c85b571d2d170c28159b0c887be1b264d78 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Thu, 11 Apr 2024 06:37:16 -0700 Subject: [PATCH 2/6] Remove bfloat16_math pt1. Signed-off-by: JackAKirk --- .../array_of_matrices.ll | 129 +++++++++--------- .../joint_matrix_bfloat16.ll | 71 +++++----- .../joint_matrix_half.ll | 73 +++++----- .../array_of_matrices.ll | 129 +++++++++--------- .../llvm/SYCLLowerIR/DeviceConfigFile.td | 3 +- .../Instrumentation/AddressSanitizer/spir.ll | 83 ++++++----- .../device-code-split/vtable.ll | 49 ++++--- .../multiple-filtered-outputs.ll | 93 +++++++------ sycl/doc/design/DeviceConfigFile.md | 1 - sycl/include/sycl/device_aspect_macros.hpp | 10 -- sycl/include/sycl/info/aspects.def | 1 - sycl/include/sycl/info/device_traits.def | 2 - sycl/source/detail/device_impl.cpp | 2 - sycl/source/detail/device_info.hpp | 25 ---- sycl/test-e2e/BFloat16/bfloat16_builtins.cpp | 1 - sycl/test-e2e/Basic/aspects.cpp | 3 - .../ballot_group_algorithms.cpp | 12 +- sycl/test/abi/sycl_symbols_linux.dump | 1 - sycl/test/abi/sycl_symbols_windows.dump | 1 - .../properties/properties_kernel.cpp | 14 +- .../properties_kernel_device_has.cpp | 7 +- .../properties_kernel_device_has_macro.cpp | 5 +- 22 files changed, 327 insertions(+), 388 deletions(-) diff --git a/llvm-spirv/test/extensions/INTEL/SPV_INTEL_joint_matrix/array_of_matrices.ll b/llvm-spirv/test/extensions/INTEL/SPV_INTEL_joint_matrix/array_of_matrices.ll index 0571af5dd975c..eb744064c87df 100644 --- a/llvm-spirv/test/extensions/INTEL/SPV_INTEL_joint_matrix/array_of_matrices.ll +++ b/llvm-spirv/test/extensions/INTEL/SPV_INTEL_joint_matrix/array_of_matrices.ll @@ -369,68 +369,67 @@ attributes #5 = { convergent nounwind } !32 = !{!"host_debuggable", i32 32} !33 = !{!"ext_intel_gpu_hw_threads_per_eu", i32 33} !34 = !{!"ext_oneapi_cuda_async_barrier", i32 34} -!35 = !{!"ext_oneapi_bfloat16_math_functions", i32 35} -!36 = !{!"ext_intel_free_memory", i32 36} -!37 = !{!"ext_intel_device_id", i32 37} -!38 = !{!"ext_intel_memory_clock_rate", i32 38} -!39 = !{!"ext_intel_memory_bus_width", i32 39} -!40 = !{!"emulated", i32 40} -!41 = !{!"ext_intel_legacy_image", i32 41} -!42 = !{!"ext_oneapi_bindless_images", i32 42} -!43 = !{!"ext_oneapi_bindless_images_shared_usm", i32 43} -!44 = !{!"ext_oneapi_bindless_images_1d_usm", i32 44} -!45 = !{!"ext_oneapi_bindless_images_2d_usm", i32 45} -!46 = !{!"ext_oneapi_interop_memory_import", i32 46} -!47 = !{!"ext_oneapi_interop_memory_export", i32 47} -!48 = !{!"ext_oneapi_interop_semaphore_import", i32 48} -!49 = !{!"ext_oneapi_interop_semaphore_export", i32 49} -!50 = !{!"ext_oneapi_mipmap", i32 50} -!51 = !{!"ext_oneapi_mipmap_anisotropy", i32 51} -!52 = !{!"ext_oneapi_mipmap_level_reference", i32 52} -!53 = !{!"int64_base_atomics", i32 7} -!54 = !{!"int64_extended_atomics", i32 8} -!55 = !{!"usm_system_allocator", i32 17} -!56 = !{!"usm_restricted_shared_allocations", i32 16} -!57 = !{!"host", i32 0} -!58 = !{!"clang version 18.0.0 (https://github.com/intel/llvm.git cc440821c30daabef517c7c8ff75546719f8094c)"} -!59 = !{i32 242145} -!60 = !{i32 -1, i32 -1, i32 -1} -!61 = !{i32 16} -!62 = !{} -!63 = !{i1 false, i1 false, i1 false} -!64 = !{!65, !65, i64 0} -!65 = !{!"any pointer", !66, i64 0} -!66 = !{!"omnipotent char", !67, i64 0} -!67 = !{!"Simple C++ TBAA"} -!68 = !{!69, !71, !73} -!69 = distinct !{!69, !70, !"_ZN7__spirv22InitSizesSTWorkgroupIdILi2EN4sycl3_V12idILi2EEEE8initSizeEv: %agg.result"} -!70 = distinct !{!70, !"_ZN7__spirv22InitSizesSTWorkgroupIdILi2EN4sycl3_V12idILi2EEEE8initSizeEv"} -!71 = distinct !{!71, !72, !"_ZN7__spirv15initWorkgroupIdILi2EN4sycl3_V12idILi2EEEEET0_v: %agg.result"} -!72 = distinct !{!72, !"_ZN7__spirv15initWorkgroupIdILi2EN4sycl3_V12idILi2EEEEET0_v"} -!73 = distinct !{!73, !74, !"_ZN4sycl3_V16detail7Builder10getElementILi2EEEKNS0_7nd_itemIXT_EEEPS5_: %agg.result"} -!74 = distinct !{!74, !"_ZN4sycl3_V16detail7Builder10getElementILi2EEEKNS0_7nd_itemIXT_EEEPS5_"} -!75 = !{!76, !78, !73} -!76 = distinct !{!76, !77, !"_ZN7__spirv28InitSizesSTLocalInvocationIdILi2EN4sycl3_V12idILi2EEEE8initSizeEv: %agg.result"} -!77 = distinct !{!77, !"_ZN7__spirv28InitSizesSTLocalInvocationIdILi2EN4sycl3_V12idILi2EEEE8initSizeEv"} -!78 = distinct !{!78, !79, !"_ZN7__spirv21initLocalInvocationIdILi2EN4sycl3_V12idILi2EEEEET0_v: %agg.result"} -!79 = distinct !{!79, !"_ZN7__spirv21initLocalInvocationIdILi2EN4sycl3_V12idILi2EEEEET0_v"} -!80 = distinct !{!80, !81} -!81 = !{!"llvm.loop.mustprogress"} -!82 = !{!83, !65, i64 0} -!83 = !{!"_ZTSN4sycl3_V13ext6oneapi12experimental6matrix12joint_matrixINS0_9sub_groupEfLNS4_3useE2ELm8ELm16ELNS4_6layoutE3EEE", !65, i64 0} -!84 = distinct !{!84, !81} -!85 = distinct !{!85, !81} -!86 = !{!87, !65, i64 0} -!87 = !{!"_ZTSN4sycl3_V13ext6oneapi12experimental6matrix12joint_matrixINS0_9sub_groupENS2_8bfloat16ELNS4_3useE0ELm8ELm16ELNS4_6layoutE0EEE", !65, i64 0} -!88 = distinct !{!88, !81} -!89 = !{!90, !65, i64 0} -!90 = !{!"_ZTSN4sycl3_V13ext6oneapi12experimental6matrix12joint_matrixINS0_9sub_groupENS2_8bfloat16ELNS4_3useE1ELm16ELm16ELNS4_6layoutE2EEE", !65, i64 0} -!91 = distinct !{!91, !81} -!92 = !{!93} -!93 = distinct !{!93, !94, !"_ZN4sycl3_V13ext6oneapi12experimental6matrix16joint_matrix_madINS0_9sub_groupENS2_8bfloat16ES7_fLm8ELm16ELm16ELNS4_6layoutE0ELS8_2EEENS4_12joint_matrixIT_T2_LNS4_3useE2EXT3_EXT5_ELS8_3EEESA_RNS9_ISA_T0_LSC_0EXT3_EXT4_EXT6_EEERNS9_ISA_T1_LSC_1EXT4_EXT5_EXT7_EEERSD_: %agg.result"} -!94 = distinct !{!94, !"_ZN4sycl3_V13ext6oneapi12experimental6matrix16joint_matrix_madINS0_9sub_groupENS2_8bfloat16ES7_fLm8ELm16ELm16ELNS4_6layoutE0ELS8_2EEENS4_12joint_matrixIT_T2_LNS4_3useE2EXT3_EXT5_ELS8_3EEESA_RNS9_ISA_T0_LSC_0EXT3_EXT4_EXT6_EEERNS9_ISA_T1_LSC_1EXT4_EXT5_EXT7_EEERSD_"} -!95 = distinct !{!95, !81} -!96 = distinct !{!96, !81} -!97 = distinct !{!97, !81} -!98 = distinct !{!98, !81} -!99 = distinct !{!99, !81} +!35 = !{!"ext_intel_free_memory", i32 36} +!36 = !{!"ext_intel_device_id", i32 37} +!37 = !{!"ext_intel_memory_clock_rate", i32 38} +!38 = !{!"ext_intel_memory_bus_width", i32 39} +!39 = !{!"emulated", i32 40} +!40 = !{!"ext_intel_legacy_image", i32 41} +!41 = !{!"ext_oneapi_bindless_images", i32 42} +!42 = !{!"ext_oneapi_bindless_images_shared_usm", i32 43} +!43 = !{!"ext_oneapi_bindless_images_1d_usm", i32 44} +!44 = !{!"ext_oneapi_bindless_images_2d_usm", i32 45} +!45 = !{!"ext_oneapi_interop_memory_import", i32 46} +!46 = !{!"ext_oneapi_interop_memory_export", i32 47} +!47 = !{!"ext_oneapi_interop_semaphore_import", i32 48} +!48 = !{!"ext_oneapi_interop_semaphore_export", i32 49} +!49 = !{!"ext_oneapi_mipmap", i32 50} +!50 = !{!"ext_oneapi_mipmap_anisotropy", i32 51} +!51 = !{!"ext_oneapi_mipmap_level_reference", i32 52} +!52 = !{!"int64_base_atomics", i32 7} +!53 = !{!"int64_extended_atomics", i32 8} +!54 = !{!"usm_system_allocator", i32 17} +!55 = !{!"usm_restricted_shared_allocations", i32 16} +!56 = !{!"host", i32 0} +!57 = !{!"clang version 18.0.0 (https://github.com/intel/llvm.git cc440821c30daabef517c7c8ff75546719f8094c)"} +!58 = !{i32 242145} +!59 = !{i32 -1, i32 -1, i32 -1} +!60 = !{i32 16} +!61 = !{} +!62 = !{i1 false, i1 false, i1 false} +!63 = !{!65, !65, i64 0} +!64 = !{!"any pointer", !66, i64 0} +!65 = !{!"omnipotent char", !67, i64 0} +!66 = !{!"Simple C++ TBAA"} +!67 = !{!69, !71, !73} +!68 = distinct !{!69, !70, !"_ZN7__spirv22InitSizesSTWorkgroupIdILi2EN4sycl3_V12idILi2EEEE8initSizeEv: %agg.result"} +!69 = distinct !{!70, !"_ZN7__spirv22InitSizesSTWorkgroupIdILi2EN4sycl3_V12idILi2EEEE8initSizeEv"} +!70 = distinct !{!71, !72, !"_ZN7__spirv15initWorkgroupIdILi2EN4sycl3_V12idILi2EEEEET0_v: %agg.result"} +!71 = distinct !{!72, !"_ZN7__spirv15initWorkgroupIdILi2EN4sycl3_V12idILi2EEEEET0_v"} +!72 = distinct !{!73, !74, !"_ZN4sycl3_V16detail7Builder10getElementILi2EEEKNS0_7nd_itemIXT_EEEPS5_: %agg.result"} +!73 = distinct !{!74, !"_ZN4sycl3_V16detail7Builder10getElementILi2EEEKNS0_7nd_itemIXT_EEEPS5_"} +!74 = !{!76, !78, !73} +!75 = distinct !{!76, !77, !"_ZN7__spirv28InitSizesSTLocalInvocationIdILi2EN4sycl3_V12idILi2EEEE8initSizeEv: %agg.result"} +!76 = distinct !{!77, !"_ZN7__spirv28InitSizesSTLocalInvocationIdILi2EN4sycl3_V12idILi2EEEE8initSizeEv"} +!77 = distinct !{!78, !79, !"_ZN7__spirv21initLocalInvocationIdILi2EN4sycl3_V12idILi2EEEEET0_v: %agg.result"} +!78 = distinct !{!79, !"_ZN7__spirv21initLocalInvocationIdILi2EN4sycl3_V12idILi2EEEEET0_v"} +!79 = distinct !{!80, !81} +!80 = !{!"llvm.loop.mustprogress"} +!81 = !{!83, !65, i64 0} +!82 = !{!"_ZTSN4sycl3_V13ext6oneapi12experimental6matrix12joint_matrixINS0_9sub_groupEfLNS4_3useE2ELm8ELm16ELNS4_6layoutE3EEE", !65, i64 0} +!83 = distinct !{!84, !81} +!84 = distinct !{!85, !81} +!85 = !{!87, !65, i64 0} +!86 = !{!"_ZTSN4sycl3_V13ext6oneapi12experimental6matrix12joint_matrixINS0_9sub_groupENS2_8bfloat16ELNS4_3useE0ELm8ELm16ELNS4_6layoutE0EEE", !65, i64 0} +!87 = distinct !{!88, !81} +!88 = !{!90, !65, i64 0} +!89 = !{!"_ZTSN4sycl3_V13ext6oneapi12experimental6matrix12joint_matrixINS0_9sub_groupENS2_8bfloat16ELNS4_3useE1ELm16ELm16ELNS4_6layoutE2EEE", !65, i64 0} +!90 = distinct !{!91, !81} +!91 = !{!93} +!92 = distinct !{!93, !94, !"_ZN4sycl3_V13ext6oneapi12experimental6matrix16joint_matrix_madINS0_9sub_groupENS2_8bfloat16ES7_fLm8ELm16ELm16ELNS4_6layoutE0ELS8_2EEENS4_12joint_matrixIT_T2_LNS4_3useE2EXT3_EXT5_ELS8_3EEESA_RNS9_ISA_T0_LSC_0EXT3_EXT4_EXT6_EEERNS9_ISA_T1_LSC_1EXT4_EXT5_EXT7_EEERSD_: %agg.result"} +!93 = distinct !{!94, !"_ZN4sycl3_V13ext6oneapi12experimental6matrix16joint_matrix_madINS0_9sub_groupENS2_8bfloat16ES7_fLm8ELm16ELm16ELNS4_6layoutE0ELS8_2EEENS4_12joint_matrixIT_T2_LNS4_3useE2EXT3_EXT5_ELS8_3EEESA_RNS9_ISA_T0_LSC_0EXT3_EXT4_EXT6_EEERNS9_ISA_T1_LSC_1EXT4_EXT5_EXT7_EEERSD_"} +!94 = distinct !{!95, !81} +!95 = distinct !{!96, !81} +!96 = distinct !{!97, !81} +!97 = distinct !{!98, !81} +!98 = distinct !{!99, !81} diff --git a/llvm-spirv/test/extensions/INTEL/SPV_INTEL_joint_matrix/joint_matrix_bfloat16.ll b/llvm-spirv/test/extensions/INTEL/SPV_INTEL_joint_matrix/joint_matrix_bfloat16.ll index 80c014b6891f6..0188e283ad0d9 100644 --- a/llvm-spirv/test/extensions/INTEL/SPV_INTEL_joint_matrix/joint_matrix_bfloat16.ll +++ b/llvm-spirv/test/extensions/INTEL/SPV_INTEL_joint_matrix/joint_matrix_bfloat16.ll @@ -164,39 +164,38 @@ attributes #3 = { convergent nounwind } !32 = !{!"host_debuggable", i32 32} !33 = !{!"ext_intel_gpu_hw_threads_per_eu", i32 33} !34 = !{!"ext_oneapi_cuda_async_barrier", i32 34} -!35 = !{!"ext_oneapi_bfloat16_math_functions", i32 35} -!36 = !{!"ext_intel_free_memory", i32 36} -!37 = !{!"ext_intel_device_id", i32 37} -!38 = !{!"ext_intel_memory_clock_rate", i32 38} -!39 = !{!"ext_intel_memory_bus_width", i32 39} -!40 = !{!"emulated", i32 40} -!41 = !{!"ext_intel_legacy_image", i32 41} -!42 = !{!"int64_base_atomics", i32 7} -!43 = !{!"int64_extended_atomics", i32 8} -!44 = !{!"usm_system_allocator", i32 17} -!45 = !{!"usm_restricted_shared_allocations", i32 16} -!46 = !{!"host", i32 0} -!47 = !{!"clang version 17.0.0 (https://github.com/intel/llvm.git 93f477358d74ae90024f758e7eeb97d4b13cea42)"} -!48 = !{i32 10642943} -!49 = !{i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1} -!50 = !{i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false} -!51 = !{i32 16} -!52 = !{} -!53 = !{i1 false, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true} -!54 = !{!55, !57, !59} -!55 = distinct !{!55, !56, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi2EN4sycl3_V12idILi2EEEE8initSizeEv: %agg.result"} -!56 = distinct !{!56, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi2EN4sycl3_V12idILi2EEEE8initSizeEv"} -!57 = distinct !{!57, !58, !"_ZN7__spirvL22initGlobalInvocationIdILi2EN4sycl3_V12idILi2EEEEET0_v: %agg.result"} -!58 = distinct !{!58, !"_ZN7__spirvL22initGlobalInvocationIdILi2EN4sycl3_V12idILi2EEEEET0_v"} -!59 = distinct !{!59, !60, !"_ZN4sycl3_V16detail7Builder10getElementILi2EEEKNS0_7nd_itemIXT_EEEPS5_: %agg.result"} -!60 = distinct !{!60, !"_ZN4sycl3_V16detail7Builder10getElementILi2EEEKNS0_7nd_itemIXT_EEEPS5_"} -!61 = !{!62, !64, !59} -!62 = distinct !{!62, !63, !"_ZN7__spirv28InitSizesSTLocalInvocationIdILi2EN4sycl3_V12idILi2EEEE8initSizeEv: %agg.result"} -!63 = distinct !{!63, !"_ZN7__spirv28InitSizesSTLocalInvocationIdILi2EN4sycl3_V12idILi2EEEE8initSizeEv"} -!64 = distinct !{!64, !65, !"_ZN7__spirvL21initLocalInvocationIdILi2EN4sycl3_V12idILi2EEEEET0_v: %agg.result"} -!65 = distinct !{!65, !"_ZN7__spirvL21initLocalInvocationIdILi2EN4sycl3_V12idILi2EEEEET0_v"} -!66 = !{!67} -!67 = distinct !{!67, !68, !"_ZN4sycl3_V13ext6oneapi12experimental6matrix16joint_matrix_madINS0_9sub_groupENS2_8bfloat16ES7_fLm8ELm16ELm16ELNS4_6layoutE0ELS8_2EEENS4_12joint_matrixIT_T2_LNS4_3useE2EXT3_EXT5_ELS8_3EEESA_RNS9_ISA_T0_LSC_0EXT3_EXT4_EXT6_EEERNS9_ISA_T1_LSC_1EXT4_EXT5_EXT7_EEERSD_: %agg.result"} -!68 = distinct !{!68, !"_ZN4sycl3_V13ext6oneapi12experimental6matrix16joint_matrix_madINS0_9sub_groupENS2_8bfloat16ES7_fLm8ELm16ELm16ELNS4_6layoutE0ELS8_2EEENS4_12joint_matrixIT_T2_LNS4_3useE2EXT3_EXT5_ELS8_3EEESA_RNS9_ISA_T0_LSC_0EXT3_EXT4_EXT6_EEERNS9_ISA_T1_LSC_1EXT4_EXT5_EXT7_EEERSD_"} -!69 = distinct !{!69, !70} -!70 = !{!"llvm.loop.mustprogress"} +!35 = !{!"ext_intel_free_memory", i32 36} +!36 = !{!"ext_intel_device_id", i32 37} +!37 = !{!"ext_intel_memory_clock_rate", i32 38} +!38 = !{!"ext_intel_memory_bus_width", i32 39} +!39 = !{!"emulated", i32 40} +!40 = !{!"ext_intel_legacy_image", i32 41} +!41 = !{!"int64_base_atomics", i32 7} +!42 = !{!"int64_extended_atomics", i32 8} +!43 = !{!"usm_system_allocator", i32 17} +!44 = !{!"usm_restricted_shared_allocations", i32 16} +!45 = !{!"host", i32 0} +!46 = !{!"clang version 17.0.0 (https://github.com/intel/llvm.git 93f477358d74ae90024f758e7eeb97d4b13cea42)"} +!47 = !{i32 10642943} +!48 = !{i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1} +!49 = !{i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false} +!50 = !{i32 16} +!51 = !{} +!52 = !{i1 false, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true} +!53 = !{!55, !57, !59} +!54 = distinct !{!55, !56, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi2EN4sycl3_V12idILi2EEEE8initSizeEv: %agg.result"} +!55 = distinct !{!56, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi2EN4sycl3_V12idILi2EEEE8initSizeEv"} +!56 = distinct !{!57, !58, !"_ZN7__spirvL22initGlobalInvocationIdILi2EN4sycl3_V12idILi2EEEEET0_v: %agg.result"} +!57 = distinct !{!58, !"_ZN7__spirvL22initGlobalInvocationIdILi2EN4sycl3_V12idILi2EEEEET0_v"} +!58 = distinct !{!59, !60, !"_ZN4sycl3_V16detail7Builder10getElementILi2EEEKNS0_7nd_itemIXT_EEEPS5_: %agg.result"} +!59 = distinct !{!60, !"_ZN4sycl3_V16detail7Builder10getElementILi2EEEKNS0_7nd_itemIXT_EEEPS5_"} +!60 = !{!62, !64, !59} +!61 = distinct !{!62, !63, !"_ZN7__spirv28InitSizesSTLocalInvocationIdILi2EN4sycl3_V12idILi2EEEE8initSizeEv: %agg.result"} +!62 = distinct !{!63, !"_ZN7__spirv28InitSizesSTLocalInvocationIdILi2EN4sycl3_V12idILi2EEEE8initSizeEv"} +!63 = distinct !{!64, !65, !"_ZN7__spirvL21initLocalInvocationIdILi2EN4sycl3_V12idILi2EEEEET0_v: %agg.result"} +!64 = distinct !{!65, !"_ZN7__spirvL21initLocalInvocationIdILi2EN4sycl3_V12idILi2EEEEET0_v"} +!65 = !{!67} +!66 = distinct !{!67, !68, !"_ZN4sycl3_V13ext6oneapi12experimental6matrix16joint_matrix_madINS0_9sub_groupENS2_8bfloat16ES7_fLm8ELm16ELm16ELNS4_6layoutE0ELS8_2EEENS4_12joint_matrixIT_T2_LNS4_3useE2EXT3_EXT5_ELS8_3EEESA_RNS9_ISA_T0_LSC_0EXT3_EXT4_EXT6_EEERNS9_ISA_T1_LSC_1EXT4_EXT5_EXT7_EEERSD_: %agg.result"} +!67 = distinct !{!68, !"_ZN4sycl3_V13ext6oneapi12experimental6matrix16joint_matrix_madINS0_9sub_groupENS2_8bfloat16ES7_fLm8ELm16ELm16ELNS4_6layoutE0ELS8_2EEENS4_12joint_matrixIT_T2_LNS4_3useE2EXT3_EXT5_ELS8_3EEESA_RNS9_ISA_T0_LSC_0EXT3_EXT4_EXT6_EEERNS9_ISA_T1_LSC_1EXT4_EXT5_EXT7_EEERSD_"} +!68 = distinct !{!69, !70} +!69 = !{!"llvm.loop.mustprogress"} diff --git a/llvm-spirv/test/extensions/INTEL/SPV_INTEL_joint_matrix/joint_matrix_half.ll b/llvm-spirv/test/extensions/INTEL/SPV_INTEL_joint_matrix/joint_matrix_half.ll index 6b5c380de586a..5c7d70ffe14c3 100644 --- a/llvm-spirv/test/extensions/INTEL/SPV_INTEL_joint_matrix/joint_matrix_half.ll +++ b/llvm-spirv/test/extensions/INTEL/SPV_INTEL_joint_matrix/joint_matrix_half.ll @@ -171,40 +171,39 @@ attributes #3 = { convergent nounwind } !33 = !{!"host_debuggable", i32 32} !34 = !{!"ext_intel_gpu_hw_threads_per_eu", i32 33} !35 = !{!"ext_oneapi_cuda_async_barrier", i32 34} -!36 = !{!"ext_oneapi_bfloat16_math_functions", i32 35} -!37 = !{!"ext_intel_free_memory", i32 36} -!38 = !{!"ext_intel_device_id", i32 37} -!39 = !{!"ext_intel_memory_clock_rate", i32 38} -!40 = !{!"ext_intel_memory_bus_width", i32 39} -!41 = !{!"emulated", i32 40} -!42 = !{!"ext_intel_legacy_image", i32 41} -!43 = !{!"int64_base_atomics", i32 7} -!44 = !{!"int64_extended_atomics", i32 8} -!45 = !{!"usm_system_allocator", i32 17} -!46 = !{!"usm_restricted_shared_allocations", i32 16} -!47 = !{!"host", i32 0} -!48 = !{!"clang version 17.0.0 (https://github.com/intel/llvm.git 93f477358d74ae90024f758e7eeb97d4b13cea42)"} -!49 = !{i32 10643216} -!50 = !{i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1} -!51 = !{i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false} -!52 = !{i32 16} -!53 = !{i32 5} -!54 = !{} -!55 = !{i1 false, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false} -!56 = !{!57, !59, !61} -!57 = distinct !{!57, !58, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi2EN4sycl3_V12idILi2EEEE8initSizeEv: %agg.result"} -!58 = distinct !{!58, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi2EN4sycl3_V12idILi2EEEE8initSizeEv"} -!59 = distinct !{!59, !60, !"_ZN7__spirvL22initGlobalInvocationIdILi2EN4sycl3_V12idILi2EEEEET0_v: %agg.result"} -!60 = distinct !{!60, !"_ZN7__spirvL22initGlobalInvocationIdILi2EN4sycl3_V12idILi2EEEEET0_v"} -!61 = distinct !{!61, !62, !"_ZN4sycl3_V16detail7Builder10getElementILi2EEEKNS0_7nd_itemIXT_EEEPS5_: %agg.result"} -!62 = distinct !{!62, !"_ZN4sycl3_V16detail7Builder10getElementILi2EEEKNS0_7nd_itemIXT_EEEPS5_"} -!63 = !{!64, !66, !61} -!64 = distinct !{!64, !65, !"_ZN7__spirv28InitSizesSTLocalInvocationIdILi2EN4sycl3_V12idILi2EEEE8initSizeEv: %agg.result"} -!65 = distinct !{!65, !"_ZN7__spirv28InitSizesSTLocalInvocationIdILi2EN4sycl3_V12idILi2EEEE8initSizeEv"} -!66 = distinct !{!66, !67, !"_ZN7__spirvL21initLocalInvocationIdILi2EN4sycl3_V12idILi2EEEEET0_v: %agg.result"} -!67 = distinct !{!67, !"_ZN7__spirvL21initLocalInvocationIdILi2EN4sycl3_V12idILi2EEEEET0_v"} -!68 = !{!69} -!69 = distinct !{!69, !70, !"_ZN4sycl3_V13ext6oneapi12experimental6matrix16joint_matrix_madINS0_9sub_groupENS0_6detail9half_impl4halfES9_fLm8ELm16ELm16ELNS4_6layoutE0ELSA_2EEENS4_12joint_matrixIT_T2_LNS4_3useE2EXT3_EXT5_ELSA_3EEESC_RNSB_ISC_T0_LSE_0EXT3_EXT4_EXT6_EEERNSB_ISC_T1_LSE_1EXT4_EXT5_EXT7_EEERSF_: %agg.result"} -!70 = distinct !{!70, !"_ZN4sycl3_V13ext6oneapi12experimental6matrix16joint_matrix_madINS0_9sub_groupENS0_6detail9half_impl4halfES9_fLm8ELm16ELm16ELNS4_6layoutE0ELSA_2EEENS4_12joint_matrixIT_T2_LNS4_3useE2EXT3_EXT5_ELSA_3EEESC_RNSB_ISC_T0_LSE_0EXT3_EXT4_EXT6_EEERNSB_ISC_T1_LSE_1EXT4_EXT5_EXT7_EEERSF_"} -!71 = distinct !{!71, !72} -!72 = !{!"llvm.loop.mustprogress"} +!36 = !{!"ext_intel_free_memory", i32 36} +!37 = !{!"ext_intel_device_id", i32 37} +!38 = !{!"ext_intel_memory_clock_rate", i32 38} +!39 = !{!"ext_intel_memory_bus_width", i32 39} +!40 = !{!"emulated", i32 40} +!41 = !{!"ext_intel_legacy_image", i32 41} +!42 = !{!"int64_base_atomics", i32 7} +!43 = !{!"int64_extended_atomics", i32 8} +!44 = !{!"usm_system_allocator", i32 17} +!45 = !{!"usm_restricted_shared_allocations", i32 16} +!46 = !{!"host", i32 0} +!47 = !{!"clang version 17.0.0 (https://github.com/intel/llvm.git 93f477358d74ae90024f758e7eeb97d4b13cea42)"} +!48 = !{i32 10643216} +!49 = !{i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1} +!50 = !{i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false} +!51 = !{i32 16} +!52 = !{i32 5} +!53 = !{} +!54 = !{i1 false, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false} +!55 = !{!57, !59, !61} +!56 = distinct !{!57, !58, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi2EN4sycl3_V12idILi2EEEE8initSizeEv: %agg.result"} +!57 = distinct !{!58, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi2EN4sycl3_V12idILi2EEEE8initSizeEv"} +!58 = distinct !{!59, !60, !"_ZN7__spirvL22initGlobalInvocationIdILi2EN4sycl3_V12idILi2EEEEET0_v: %agg.result"} +!59 = distinct !{!60, !"_ZN7__spirvL22initGlobalInvocationIdILi2EN4sycl3_V12idILi2EEEEET0_v"} +!60 = distinct !{!61, !62, !"_ZN4sycl3_V16detail7Builder10getElementILi2EEEKNS0_7nd_itemIXT_EEEPS5_: %agg.result"} +!61 = distinct !{!62, !"_ZN4sycl3_V16detail7Builder10getElementILi2EEEKNS0_7nd_itemIXT_EEEPS5_"} +!62 = !{!64, !66, !61} +!63 = distinct !{!64, !65, !"_ZN7__spirv28InitSizesSTLocalInvocationIdILi2EN4sycl3_V12idILi2EEEE8initSizeEv: %agg.result"} +!64 = distinct !{!65, !"_ZN7__spirv28InitSizesSTLocalInvocationIdILi2EN4sycl3_V12idILi2EEEE8initSizeEv"} +!65 = distinct !{!66, !67, !"_ZN7__spirvL21initLocalInvocationIdILi2EN4sycl3_V12idILi2EEEEET0_v: %agg.result"} +!66 = distinct !{!67, !"_ZN7__spirvL21initLocalInvocationIdILi2EN4sycl3_V12idILi2EEEEET0_v"} +!67 = !{!69} +!68 = distinct !{!69, !70, !"_ZN4sycl3_V13ext6oneapi12experimental6matrix16joint_matrix_madINS0_9sub_groupENS0_6detail9half_impl4halfES9_fLm8ELm16ELm16ELNS4_6layoutE0ELSA_2EEENS4_12joint_matrixIT_T2_LNS4_3useE2EXT3_EXT5_ELSA_3EEESC_RNSB_ISC_T0_LSE_0EXT3_EXT4_EXT6_EEERNSB_ISC_T1_LSE_1EXT4_EXT5_EXT7_EEERSF_: %agg.result"} +!69 = distinct !{!70, !"_ZN4sycl3_V13ext6oneapi12experimental6matrix16joint_matrix_madINS0_9sub_groupENS0_6detail9half_impl4halfES9_fLm8ELm16ELm16ELNS4_6layoutE0ELSA_2EEENS4_12joint_matrixIT_T2_LNS4_3useE2EXT3_EXT5_ELSA_3EEESC_RNSB_ISC_T0_LSE_0EXT3_EXT4_EXT6_EEERNSB_ISC_T1_LSE_1EXT4_EXT5_EXT7_EEERSF_"} +!70 = distinct !{!71, !72} +!71 = !{!"llvm.loop.mustprogress"} diff --git a/llvm-spirv/test/extensions/KHR/SPV_KHR_cooperative_matrix/array_of_matrices.ll b/llvm-spirv/test/extensions/KHR/SPV_KHR_cooperative_matrix/array_of_matrices.ll index 28979b4eb1a10..a685787f0e89d 100644 --- a/llvm-spirv/test/extensions/KHR/SPV_KHR_cooperative_matrix/array_of_matrices.ll +++ b/llvm-spirv/test/extensions/KHR/SPV_KHR_cooperative_matrix/array_of_matrices.ll @@ -370,68 +370,67 @@ attributes #5 = { convergent nounwind } !32 = !{!"host_debuggable", i32 32} !33 = !{!"ext_intel_gpu_hw_threads_per_eu", i32 33} !34 = !{!"ext_oneapi_cuda_async_barrier", i32 34} -!35 = !{!"ext_oneapi_bfloat16_math_functions", i32 35} -!36 = !{!"ext_intel_free_memory", i32 36} -!37 = !{!"ext_intel_device_id", i32 37} -!38 = !{!"ext_intel_memory_clock_rate", i32 38} -!39 = !{!"ext_intel_memory_bus_width", i32 39} -!40 = !{!"emulated", i32 40} -!41 = !{!"ext_intel_legacy_image", i32 41} -!42 = !{!"ext_oneapi_bindless_images", i32 42} -!43 = !{!"ext_oneapi_bindless_images_shared_usm", i32 43} -!44 = !{!"ext_oneapi_bindless_images_1d_usm", i32 44} -!45 = !{!"ext_oneapi_bindless_images_2d_usm", i32 45} -!46 = !{!"ext_oneapi_interop_memory_import", i32 46} -!47 = !{!"ext_oneapi_interop_memory_export", i32 47} -!48 = !{!"ext_oneapi_interop_semaphore_import", i32 48} -!49 = !{!"ext_oneapi_interop_semaphore_export", i32 49} -!50 = !{!"ext_oneapi_mipmap", i32 50} -!51 = !{!"ext_oneapi_mipmap_anisotropy", i32 51} -!52 = !{!"ext_oneapi_mipmap_level_reference", i32 52} -!53 = !{!"int64_base_atomics", i32 7} -!54 = !{!"int64_extended_atomics", i32 8} -!55 = !{!"usm_system_allocator", i32 17} -!56 = !{!"usm_restricted_shared_allocations", i32 16} -!57 = !{!"host", i32 0} -!58 = !{!"clang version 18.0.0 (https://github.com/intel/llvm.git cc440821c30daabef517c7c8ff75546719f8094c)"} -!59 = !{i32 242145} -!60 = !{i32 -1, i32 -1, i32 -1} -!61 = !{i32 16} -!62 = !{} -!63 = !{i1 false, i1 false, i1 false} -!64 = !{!65, !65, i64 0} -!65 = !{!"any pointer", !66, i64 0} -!66 = !{!"omnipotent char", !67, i64 0} -!67 = !{!"Simple C++ TBAA"} -!68 = !{!69, !71, !73} -!69 = distinct !{!69, !70, !"_ZN7__spirv22InitSizesSTWorkgroupIdILi2EN4sycl3_V12idILi2EEEE8initSizeEv: %agg.result"} -!70 = distinct !{!70, !"_ZN7__spirv22InitSizesSTWorkgroupIdILi2EN4sycl3_V12idILi2EEEE8initSizeEv"} -!71 = distinct !{!71, !72, !"_ZN7__spirv15initWorkgroupIdILi2EN4sycl3_V12idILi2EEEEET0_v: %agg.result"} -!72 = distinct !{!72, !"_ZN7__spirv15initWorkgroupIdILi2EN4sycl3_V12idILi2EEEEET0_v"} -!73 = distinct !{!73, !74, !"_ZN4sycl3_V16detail7Builder10getElementILi2EEEKNS0_7nd_itemIXT_EEEPS5_: %agg.result"} -!74 = distinct !{!74, !"_ZN4sycl3_V16detail7Builder10getElementILi2EEEKNS0_7nd_itemIXT_EEEPS5_"} -!75 = !{!76, !78, !73} -!76 = distinct !{!76, !77, !"_ZN7__spirv28InitSizesSTLocalInvocationIdILi2EN4sycl3_V12idILi2EEEE8initSizeEv: %agg.result"} -!77 = distinct !{!77, !"_ZN7__spirv28InitSizesSTLocalInvocationIdILi2EN4sycl3_V12idILi2EEEE8initSizeEv"} -!78 = distinct !{!78, !79, !"_ZN7__spirv21initLocalInvocationIdILi2EN4sycl3_V12idILi2EEEEET0_v: %agg.result"} -!79 = distinct !{!79, !"_ZN7__spirv21initLocalInvocationIdILi2EN4sycl3_V12idILi2EEEEET0_v"} -!80 = distinct !{!80, !81} -!81 = !{!"llvm.loop.mustprogress"} -!82 = !{!83, !65, i64 0} -!83 = !{!"_ZTSN4sycl3_V13ext6oneapi12experimental6matrix12joint_matrixINS0_9sub_groupEfLNS4_3useE2ELm8ELm16ELNS4_6layoutE3EEE", !65, i64 0} -!84 = distinct !{!84, !81} -!85 = distinct !{!85, !81} -!86 = !{!87, !65, i64 0} -!87 = !{!"_ZTSN4sycl3_V13ext6oneapi12experimental6matrix12joint_matrixINS0_9sub_groupENS2_8bfloat16ELNS4_3useE0ELm8ELm16ELNS4_6layoutE0EEE", !65, i64 0} -!88 = distinct !{!88, !81} -!89 = !{!90, !65, i64 0} -!90 = !{!"_ZTSN4sycl3_V13ext6oneapi12experimental6matrix12joint_matrixINS0_9sub_groupENS2_8bfloat16ELNS4_3useE1ELm16ELm16ELNS4_6layoutE2EEE", !65, i64 0} -!91 = distinct !{!91, !81} -!92 = !{!93} -!93 = distinct !{!93, !94, !"_ZN4sycl3_V13ext6oneapi12experimental6matrix16joint_matrix_madINS0_9sub_groupENS2_8bfloat16ES7_fLm8ELm16ELm16ELNS4_6layoutE0ELS8_2EEENS4_12joint_matrixIT_T2_LNS4_3useE2EXT3_EXT5_ELS8_3EEESA_RNS9_ISA_T0_LSC_0EXT3_EXT4_EXT6_EEERNS9_ISA_T1_LSC_1EXT4_EXT5_EXT7_EEERSD_: %agg.result"} -!94 = distinct !{!94, !"_ZN4sycl3_V13ext6oneapi12experimental6matrix16joint_matrix_madINS0_9sub_groupENS2_8bfloat16ES7_fLm8ELm16ELm16ELNS4_6layoutE0ELS8_2EEENS4_12joint_matrixIT_T2_LNS4_3useE2EXT3_EXT5_ELS8_3EEESA_RNS9_ISA_T0_LSC_0EXT3_EXT4_EXT6_EEERNS9_ISA_T1_LSC_1EXT4_EXT5_EXT7_EEERSD_"} -!95 = distinct !{!95, !81} -!96 = distinct !{!96, !81} -!97 = distinct !{!97, !81} -!98 = distinct !{!98, !81} -!99 = distinct !{!99, !81} +!35 = !{!"ext_intel_free_memory", i32 36} +!36 = !{!"ext_intel_device_id", i32 37} +!37 = !{!"ext_intel_memory_clock_rate", i32 38} +!38 = !{!"ext_intel_memory_bus_width", i32 39} +!39 = !{!"emulated", i32 40} +!40 = !{!"ext_intel_legacy_image", i32 41} +!41 = !{!"ext_oneapi_bindless_images", i32 42} +!42 = !{!"ext_oneapi_bindless_images_shared_usm", i32 43} +!43 = !{!"ext_oneapi_bindless_images_1d_usm", i32 44} +!44 = !{!"ext_oneapi_bindless_images_2d_usm", i32 45} +!45 = !{!"ext_oneapi_interop_memory_import", i32 46} +!46 = !{!"ext_oneapi_interop_memory_export", i32 47} +!47 = !{!"ext_oneapi_interop_semaphore_import", i32 48} +!48 = !{!"ext_oneapi_interop_semaphore_export", i32 49} +!49 = !{!"ext_oneapi_mipmap", i32 50} +!50 = !{!"ext_oneapi_mipmap_anisotropy", i32 51} +!51 = !{!"ext_oneapi_mipmap_level_reference", i32 52} +!52 = !{!"int64_base_atomics", i32 7} +!53 = !{!"int64_extended_atomics", i32 8} +!54 = !{!"usm_system_allocator", i32 17} +!55 = !{!"usm_restricted_shared_allocations", i32 16} +!56 = !{!"host", i32 0} +!57 = !{!"clang version 18.0.0 (https://github.com/intel/llvm.git cc440821c30daabef517c7c8ff75546719f8094c)"} +!58 = !{i32 242145} +!59 = !{i32 -1, i32 -1, i32 -1} +!60 = !{i32 16} +!61 = !{} +!62 = !{i1 false, i1 false, i1 false} +!63 = !{!65, !65, i64 0} +!64 = !{!"any pointer", !66, i64 0} +!65 = !{!"omnipotent char", !67, i64 0} +!66 = !{!"Simple C++ TBAA"} +!67 = !{!69, !71, !73} +!68 = distinct !{!69, !70, !"_ZN7__spirv22InitSizesSTWorkgroupIdILi2EN4sycl3_V12idILi2EEEE8initSizeEv: %agg.result"} +!69 = distinct !{!70, !"_ZN7__spirv22InitSizesSTWorkgroupIdILi2EN4sycl3_V12idILi2EEEE8initSizeEv"} +!70 = distinct !{!71, !72, !"_ZN7__spirv15initWorkgroupIdILi2EN4sycl3_V12idILi2EEEEET0_v: %agg.result"} +!71 = distinct !{!72, !"_ZN7__spirv15initWorkgroupIdILi2EN4sycl3_V12idILi2EEEEET0_v"} +!72 = distinct !{!73, !74, !"_ZN4sycl3_V16detail7Builder10getElementILi2EEEKNS0_7nd_itemIXT_EEEPS5_: %agg.result"} +!73 = distinct !{!74, !"_ZN4sycl3_V16detail7Builder10getElementILi2EEEKNS0_7nd_itemIXT_EEEPS5_"} +!74 = !{!76, !78, !73} +!75 = distinct !{!76, !77, !"_ZN7__spirv28InitSizesSTLocalInvocationIdILi2EN4sycl3_V12idILi2EEEE8initSizeEv: %agg.result"} +!76 = distinct !{!77, !"_ZN7__spirv28InitSizesSTLocalInvocationIdILi2EN4sycl3_V12idILi2EEEE8initSizeEv"} +!77 = distinct !{!78, !79, !"_ZN7__spirv21initLocalInvocationIdILi2EN4sycl3_V12idILi2EEEEET0_v: %agg.result"} +!78 = distinct !{!79, !"_ZN7__spirv21initLocalInvocationIdILi2EN4sycl3_V12idILi2EEEEET0_v"} +!79 = distinct !{!80, !81} +!80 = !{!"llvm.loop.mustprogress"} +!81 = !{!83, !65, i64 0} +!82 = !{!"_ZTSN4sycl3_V13ext6oneapi12experimental6matrix12joint_matrixINS0_9sub_groupEfLNS4_3useE2ELm8ELm16ELNS4_6layoutE3EEE", !65, i64 0} +!83 = distinct !{!84, !81} +!84 = distinct !{!85, !81} +!85 = !{!87, !65, i64 0} +!86 = !{!"_ZTSN4sycl3_V13ext6oneapi12experimental6matrix12joint_matrixINS0_9sub_groupENS2_8bfloat16ELNS4_3useE0ELm8ELm16ELNS4_6layoutE0EEE", !65, i64 0} +!87 = distinct !{!88, !81} +!88 = !{!90, !65, i64 0} +!89 = !{!"_ZTSN4sycl3_V13ext6oneapi12experimental6matrix12joint_matrixINS0_9sub_groupENS2_8bfloat16ELNS4_3useE1ELm16ELm16ELNS4_6layoutE2EEE", !65, i64 0} +!90 = distinct !{!91, !81} +!91 = !{!93} +!92 = distinct !{!93, !94, !"_ZN4sycl3_V13ext6oneapi12experimental6matrix16joint_matrix_madINS0_9sub_groupENS2_8bfloat16ES7_fLm8ELm16ELm16ELNS4_6layoutE0ELS8_2EEENS4_12joint_matrixIT_T2_LNS4_3useE2EXT3_EXT5_ELS8_3EEESA_RNS9_ISA_T0_LSC_0EXT3_EXT4_EXT6_EEERNS9_ISA_T1_LSC_1EXT4_EXT5_EXT7_EEERSD_: %agg.result"} +!93 = distinct !{!94, !"_ZN4sycl3_V13ext6oneapi12experimental6matrix16joint_matrix_madINS0_9sub_groupENS2_8bfloat16ES7_fLm8ELm16ELm16ELNS4_6layoutE0ELS8_2EEENS4_12joint_matrixIT_T2_LNS4_3useE2EXT3_EXT5_ELS8_3EEESA_RNS9_ISA_T0_LSC_0EXT3_EXT4_EXT6_EEERNS9_ISA_T1_LSC_1EXT4_EXT5_EXT7_EEERSD_"} +!94 = distinct !{!95, !81} +!95 = distinct !{!96, !81} +!96 = distinct !{!97, !81} +!97 = distinct !{!98, !81} +!98 = distinct !{!99, !81} diff --git a/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td b/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td index 2befb540e3daa..a65b76fa105f2 100644 --- a/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td +++ b/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td @@ -43,7 +43,6 @@ def AspectExt_oneapi_native_assert : Aspect<"ext_oneapi_native_assert">; def AspectHost_debuggable : Aspect<"host_debuggable">; def AspectExt_intel_gpu_hw_threads_per_eu : Aspect<"ext_intel_gpu_hw_threads_per_eu">; def AspectExt_oneapi_cuda_async_barrier : Aspect<"ext_oneapi_cuda_async_barrier">; -def AspectExt_oneapi_bfloat16_math_functions : Aspect<"ext_oneapi_bfloat16_math_functions">; def AspectExt_intel_free_memory : Aspect<"ext_intel_free_memory">; def AspectExt_intel_device_id : Aspect<"ext_intel_device_id">; def AspectExt_intel_memory_clock_rate : Aspect<"ext_intel_memory_clock_rate">; @@ -117,7 +116,7 @@ def : TargetInfo<"__TestAspectList", AspectExt_intel_max_mem_bandwidth, AspectExt_intel_mem_channel, AspectUsm_atomic_host_allocations, AspectUsm_atomic_shared_allocations, AspectAtomic64, AspectExt_intel_device_info_uuid, AspectExt_oneapi_srgb, AspectExt_oneapi_native_assert, AspectHost_debuggable, AspectExt_intel_gpu_hw_threads_per_eu, - AspectExt_oneapi_cuda_async_barrier, AspectExt_oneapi_bfloat16_math_functions, AspectExt_intel_free_memory, + AspectExt_oneapi_cuda_async_barrier, AspectExt_intel_free_memory, AspectExt_intel_device_id, AspectExt_intel_memory_clock_rate, AspectExt_intel_memory_bus_width, AspectEmulated, AspectExt_intel_legacy_image, AspectExt_oneapi_bindless_images, AspectExt_oneapi_bindless_images_shared_usm, AspectExt_oneapi_bindless_images_1d_usm, AspectExt_oneapi_bindless_images_2d_usm, diff --git a/llvm/test/Instrumentation/AddressSanitizer/spir.ll b/llvm/test/Instrumentation/AddressSanitizer/spir.ll index cee6a67605c5d..fb26cbf3415f1 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/spir.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/spir.ll @@ -112,45 +112,44 @@ attributes #5 = { convergent nounwind } !33 = !{!"host_debuggable", i32 32} !34 = !{!"ext_intel_gpu_hw_threads_per_eu", i32 33} !35 = !{!"ext_oneapi_cuda_async_barrier", i32 34} -!36 = !{!"ext_oneapi_bfloat16_math_functions", i32 35} -!37 = !{!"ext_intel_free_memory", i32 36} -!38 = !{!"ext_intel_device_id", i32 37} -!39 = !{!"ext_intel_memory_clock_rate", i32 38} -!40 = !{!"ext_intel_memory_bus_width", i32 39} -!41 = !{!"emulated", i32 40} -!42 = !{!"ext_intel_legacy_image", i32 41} -!43 = !{!"ext_oneapi_bindless_images", i32 42} -!44 = !{!"ext_oneapi_bindless_images_shared_usm", i32 43} -!45 = !{!"ext_oneapi_bindless_images_1d_usm", i32 44} -!46 = !{!"ext_oneapi_bindless_images_2d_usm", i32 45} -!47 = !{!"ext_oneapi_interop_memory_import", i32 46} -!48 = !{!"ext_oneapi_interop_memory_export", i32 47} -!49 = !{!"ext_oneapi_interop_semaphore_import", i32 48} -!50 = !{!"ext_oneapi_interop_semaphore_export", i32 49} -!51 = !{!"ext_oneapi_mipmap", i32 50} -!52 = !{!"ext_oneapi_mipmap_anisotropy", i32 51} -!53 = !{!"ext_oneapi_mipmap_level_reference", i32 52} -!54 = !{!"ext_intel_esimd", i32 53} -!55 = !{!"ext_oneapi_ballot_group", i32 54} -!56 = !{!"ext_oneapi_fixed_size_group", i32 55} -!57 = !{!"ext_oneapi_opportunistic_group", i32 56} -!58 = !{!"ext_oneapi_tangle_group", i32 57} -!59 = !{!"int64_base_atomics", i32 7} -!60 = !{!"int64_extended_atomics", i32 8} -!61 = !{!"usm_system_allocator", i32 17} -!62 = !{!"usm_restricted_shared_allocations", i32 16} -!63 = !{!"host", i32 0} -!64 = !{!"clang version 18.0.0git (https://github.com/intel/llvm.git caecf6b928648a83c8ceb84988231cb246c4365e)"} -!65 = !{i32 419} -!66 = !{i32 -1, i32 -1, i32 -1, i32 -1, i32 -1} -!67 = !{i1 false, i1 true, i1 false, i1 false, i1 false} -!68 = !{} -!69 = !{!70} -!70 = distinct !{!70, !71, !"_ZN4sycl3_V13ext6oneapi18group_local_memoryIA4_iNS0_5groupILi1EEEJEEENSt9enable_ifIXaasr3stdE27is_trivially_destructible_vIT_Esr4sycl6detail8is_groupIT0_EE5valueENS0_9multi_ptrIS8_LNS0_6access13address_spaceE3ELNSB_9decoratedE2EEEE4typeES9_DpOT1_: %agg.result"} -!71 = distinct !{!71, !"_ZN4sycl3_V13ext6oneapi18group_local_memoryIA4_iNS0_5groupILi1EEEJEEENSt9enable_ifIXaasr3stdE27is_trivially_destructible_vIT_Esr4sycl6detail8is_groupIT0_EE5valueENS0_9multi_ptrIS8_LNS0_6access13address_spaceE3ELNSB_9decoratedE2EEEE4typeES9_DpOT1_"} -!72 = !{!73, !73, i64 0} -!73 = !{!"long", !74, i64 0} -!74 = !{!"omnipotent char", !75, i64 0} -!75 = !{!"Simple C++ TBAA"} -!76 = !{!77, !77, i64 0} -!77 = !{!"int", !74, i64 0} +!36 = !{!"ext_intel_free_memory", i32 36} +!37 = !{!"ext_intel_device_id", i32 37} +!38 = !{!"ext_intel_memory_clock_rate", i32 38} +!39 = !{!"ext_intel_memory_bus_width", i32 39} +!40 = !{!"emulated", i32 40} +!41 = !{!"ext_intel_legacy_image", i32 41} +!42 = !{!"ext_oneapi_bindless_images", i32 42} +!43 = !{!"ext_oneapi_bindless_images_shared_usm", i32 43} +!44 = !{!"ext_oneapi_bindless_images_1d_usm", i32 44} +!45 = !{!"ext_oneapi_bindless_images_2d_usm", i32 45} +!46 = !{!"ext_oneapi_interop_memory_import", i32 46} +!47 = !{!"ext_oneapi_interop_memory_export", i32 47} +!48 = !{!"ext_oneapi_interop_semaphore_import", i32 48} +!49 = !{!"ext_oneapi_interop_semaphore_export", i32 49} +!50 = !{!"ext_oneapi_mipmap", i32 50} +!51 = !{!"ext_oneapi_mipmap_anisotropy", i32 51} +!52 = !{!"ext_oneapi_mipmap_level_reference", i32 52} +!53 = !{!"ext_intel_esimd", i32 53} +!54 = !{!"ext_oneapi_ballot_group", i32 54} +!55 = !{!"ext_oneapi_fixed_size_group", i32 55} +!56 = !{!"ext_oneapi_opportunistic_group", i32 56} +!57 = !{!"ext_oneapi_tangle_group", i32 57} +!58 = !{!"int64_base_atomics", i32 7} +!59 = !{!"int64_extended_atomics", i32 8} +!60 = !{!"usm_system_allocator", i32 17} +!61 = !{!"usm_restricted_shared_allocations", i32 16} +!62 = !{!"host", i32 0} +!63 = !{!"clang version 18.0.0git (https://github.com/intel/llvm.git caecf6b928648a83c8ceb84988231cb246c4365e)"} +!64 = !{i32 419} +!65 = !{i32 -1, i32 -1, i32 -1, i32 -1, i32 -1} +!66 = !{i1 false, i1 true, i1 false, i1 false, i1 false} +!67 = !{} +!68 = !{!70} +!69 = distinct !{!70, !71, !"_ZN4sycl3_V13ext6oneapi18group_local_memoryIA4_iNS0_5groupILi1EEEJEEENSt9enable_ifIXaasr3stdE27is_trivially_destructible_vIT_Esr4sycl6detail8is_groupIT0_EE5valueENS0_9multi_ptrIS8_LNS0_6access13address_spaceE3ELNSB_9decoratedE2EEEE4typeES9_DpOT1_: %agg.result"} +!70 = distinct !{!71, !"_ZN4sycl3_V13ext6oneapi18group_local_memoryIA4_iNS0_5groupILi1EEEJEEENSt9enable_ifIXaasr3stdE27is_trivially_destructible_vIT_Esr4sycl6detail8is_groupIT0_EE5valueENS0_9multi_ptrIS8_LNS0_6access13address_spaceE3ELNSB_9decoratedE2EEEE4typeES9_DpOT1_"} +!71 = !{!73, !73, i64 0} +!72 = !{!"long", !74, i64 0} +!73 = !{!"omnipotent char", !75, i64 0} +!74 = !{!"Simple C++ TBAA"} +!75 = !{!77, !77, i64 0} +!76 = !{!"int", !74, i64 0} diff --git a/llvm/test/tools/sycl-post-link/device-code-split/vtable.ll b/llvm/test/tools/sycl-post-link/device-code-split/vtable.ll index 02d289fa772e0..b27c184e41115 100644 --- a/llvm/test/tools/sycl-post-link/device-code-split/vtable.ll +++ b/llvm/test/tools/sycl-post-link/device-code-split/vtable.ll @@ -143,28 +143,27 @@ attributes #1 = { mustprogress norecurse nounwind "frame-pointer"="all" "no-trap !32 = !{!"host_debuggable", i32 32} !33 = !{!"ext_intel_gpu_hw_threads_per_eu", i32 33} !34 = !{!"ext_oneapi_cuda_async_barrier", i32 34} -!35 = !{!"ext_oneapi_bfloat16_math_functions", i32 35} -!36 = !{!"ext_intel_free_memory", i32 36} -!37 = !{!"ext_intel_device_id", i32 37} -!38 = !{!"ext_intel_memory_clock_rate", i32 38} -!39 = !{!"ext_intel_memory_bus_width", i32 39} -!40 = !{!"emulated", i32 40} -!41 = !{!"ext_intel_legacy_image", i32 41} -!42 = !{!"int64_base_atomics", i32 7} -!43 = !{!"int64_extended_atomics", i32 8} -!44 = !{!"usm_system_allocator", i32 17} -!45 = !{!"usm_restricted_shared_allocations", i32 16} -!46 = !{!"host", i32 0} -!47 = !{!"clang version 17.0.0 "} -!48 = !{i32 546} -!49 = !{i32 -1, i32 -1} -!50 = !{} -!51 = !{i1 false, i1 false} -!52 = !{!53, !53, i64 0} -!53 = !{!"vtable pointer", !54, i64 0} -!54 = !{!"Simple C++ TBAA"} -!55 = !{!56, !56, i64 0} -!56 = !{!"any pointer", !57, i64 0} -!57 = !{!"omnipotent char", !54, i64 0} -!58 = !{i32 193} -!59 = !{i32 273} +!35 = !{!"ext_intel_free_memory", i32 36} +!36 = !{!"ext_intel_device_id", i32 37} +!37 = !{!"ext_intel_memory_clock_rate", i32 38} +!38 = !{!"ext_intel_memory_bus_width", i32 39} +!39 = !{!"emulated", i32 40} +!40 = !{!"ext_intel_legacy_image", i32 41} +!41 = !{!"int64_base_atomics", i32 7} +!42 = !{!"int64_extended_atomics", i32 8} +!43 = !{!"usm_system_allocator", i32 17} +!44 = !{!"usm_restricted_shared_allocations", i32 16} +!45 = !{!"host", i32 0} +!46 = !{!"clang version 17.0.0 "} +!47 = !{i32 546} +!48 = !{i32 -1, i32 -1} +!49 = !{} +!50 = !{i1 false, i1 false} +!51 = !{!53, !53, i64 0} +!52 = !{!"vtable pointer", !54, i64 0} +!53 = !{!"Simple C++ TBAA"} +!54 = !{!56, !56, i64 0} +!55 = !{!"any pointer", !57, i64 0} +!56 = !{!"omnipotent char", !54, i64 0} +!57 = !{i32 193} +!58 = !{i32 273} diff --git a/llvm/test/tools/sycl-post-link/multiple-filtered-outputs.ll b/llvm/test/tools/sycl-post-link/multiple-filtered-outputs.ll index 1f014410d0a1c..054e478ee1423 100644 --- a/llvm/test/tools/sycl-post-link/multiple-filtered-outputs.ll +++ b/llvm/test/tools/sycl-post-link/multiple-filtered-outputs.ll @@ -152,50 +152,49 @@ attributes #0 = { mustprogress norecurse nounwind "frame-pointer"="all" "no-trap !32 = !{!"host_debuggable", i32 32} !33 = !{!"ext_intel_gpu_hw_threads_per_eu", i32 33} !34 = !{!"ext_oneapi_cuda_async_barrier", i32 34} -!35 = !{!"ext_oneapi_bfloat16_math_functions", i32 35} -!36 = !{!"ext_intel_free_memory", i32 36} -!37 = !{!"ext_intel_device_id", i32 37} -!38 = !{!"ext_intel_memory_clock_rate", i32 38} -!39 = !{!"ext_intel_memory_bus_width", i32 39} -!40 = !{!"emulated", i32 40} -!41 = !{!"ext_intel_legacy_image", i32 41} -!42 = !{!"ext_oneapi_bindless_images", i32 42} -!43 = !{!"ext_oneapi_bindless_images_shared_usm", i32 43} -!44 = !{!"ext_oneapi_bindless_images_1d_usm", i32 44} -!45 = !{!"ext_oneapi_bindless_images_2d_usm", i32 45} -!46 = !{!"ext_oneapi_interop_memory_import", i32 46} -!47 = !{!"ext_oneapi_interop_memory_export", i32 47} -!48 = !{!"ext_oneapi_interop_semaphore_import", i32 48} -!49 = !{!"ext_oneapi_interop_semaphore_export", i32 49} -!50 = !{!"ext_oneapi_mipmap", i32 50} -!51 = !{!"ext_oneapi_mipmap_anisotropy", i32 51} -!52 = !{!"ext_oneapi_mipmap_level_reference", i32 52} -!53 = !{!"ext_intel_esimd", i32 53} -!54 = !{!"ext_oneapi_ballot_group", i32 54} -!55 = !{!"ext_oneapi_fixed_size_group", i32 55} -!56 = !{!"ext_oneapi_opportunistic_group", i32 56} -!57 = !{!"ext_oneapi_tangle_group", i32 57} -!58 = !{!"ext_intel_matrix", i32 58} -!59 = !{!"int64_base_atomics", i32 7} -!60 = !{!"int64_extended_atomics", i32 8} -!61 = !{!"usm_system_allocator", i32 17} -!62 = !{!"usm_restricted_shared_allocations", i32 16} -!63 = !{!"host", i32 0} -!64 = !{!"clang version 19.0.0git (/ws/llvm/clang a7f3a637bdd6299831f903bbed9e8d069fea5c86)"} -!65 = !{i32 233} -!66 = !{i32 -1} -!67 = !{i32 6} -!68 = !{} -!69 = !{i1 false} -!70 = !{!71, !71, i64 0} -!71 = !{!"double", !72, i64 0} -!72 = !{!"omnipotent char", !73, i64 0} -!73 = !{!"Simple C++ TBAA"} -!74 = !{i32 364} -!75 = !{!76, !76, i64 0} -!76 = !{!"float", !72, i64 0} -!77 = !{i32 529} -!78 = !{i32 8} -!79 = !{i32 16} -!80 = !{i32 32} -!81 = !{i32 64} +!35 = !{!"ext_intel_free_memory", i32 36} +!36 = !{!"ext_intel_device_id", i32 37} +!37 = !{!"ext_intel_memory_clock_rate", i32 38} +!38 = !{!"ext_intel_memory_bus_width", i32 39} +!39 = !{!"emulated", i32 40} +!40 = !{!"ext_intel_legacy_image", i32 41} +!41 = !{!"ext_oneapi_bindless_images", i32 42} +!42 = !{!"ext_oneapi_bindless_images_shared_usm", i32 43} +!43 = !{!"ext_oneapi_bindless_images_1d_usm", i32 44} +!44 = !{!"ext_oneapi_bindless_images_2d_usm", i32 45} +!45 = !{!"ext_oneapi_interop_memory_import", i32 46} +!46 = !{!"ext_oneapi_interop_memory_export", i32 47} +!47 = !{!"ext_oneapi_interop_semaphore_import", i32 48} +!48 = !{!"ext_oneapi_interop_semaphore_export", i32 49} +!49 = !{!"ext_oneapi_mipmap", i32 50} +!50 = !{!"ext_oneapi_mipmap_anisotropy", i32 51} +!51 = !{!"ext_oneapi_mipmap_level_reference", i32 52} +!52 = !{!"ext_intel_esimd", i32 53} +!53 = !{!"ext_oneapi_ballot_group", i32 54} +!54 = !{!"ext_oneapi_fixed_size_group", i32 55} +!55 = !{!"ext_oneapi_opportunistic_group", i32 56} +!56 = !{!"ext_oneapi_tangle_group", i32 57} +!57 = !{!"ext_intel_matrix", i32 58} +!58 = !{!"int64_base_atomics", i32 7} +!59 = !{!"int64_extended_atomics", i32 8} +!60 = !{!"usm_system_allocator", i32 17} +!61 = !{!"usm_restricted_shared_allocations", i32 16} +!62 = !{!"host", i32 0} +!63 = !{!"clang version 19.0.0git (/ws/llvm/clang a7f3a637bdd6299831f903bbed9e8d069fea5c86)"} +!64 = !{i32 233} +!65 = !{i32 -1} +!66 = !{i32 6} +!67 = !{} +!68 = !{i1 false} +!69 = !{!71, !71, i64 0} +!70 = !{!"double", !72, i64 0} +!71 = !{!"omnipotent char", !73, i64 0} +!72 = !{!"Simple C++ TBAA"} +!73 = !{i32 364} +!74 = !{!76, !76, i64 0} +!75 = !{!"float", !72, i64 0} +!76 = !{i32 529} +!77 = !{i32 8} +!78 = !{i32 16} +!79 = !{i32 32} +!80 = !{i32 64} diff --git a/sycl/doc/design/DeviceConfigFile.md b/sycl/doc/design/DeviceConfigFile.md index ba9c0cebbfe4a..505c65226a809 100644 --- a/sycl/doc/design/DeviceConfigFile.md +++ b/sycl/doc/design/DeviceConfigFile.md @@ -176,7 +176,6 @@ def AspectExt_oneapi_native_assert : Aspect<"ext_oneapi_native_assert">; def AspectHost_debuggable : Aspect<"host_debuggable">; def AspectExt_intel_gpu_hw_threads_per_eu : Aspect<"ext_intel_gpu_hw_threads_per_eu">; def AspectExt_oneapi_cuda_async_barrier : Aspect<"ext_oneapi_cuda_async_barrier">; -def AspectExt_oneapi_bfloat16_math_functions : Aspect<"ext_oneapi_bfloat16_math_functions">; def AspectExt_intel_free_memory : Aspect<"ext_intel_free_memory">; def AspectExt_intel_device_id : Aspect<"ext_intel_device_id">; def AspectExt_intel_memory_clock_rate : Aspect<"ext_intel_memory_clock_rate">; diff --git a/sycl/include/sycl/device_aspect_macros.hpp b/sycl/include/sycl/device_aspect_macros.hpp index bfdee3e080b54..834d8bcc1e032 100644 --- a/sycl/include/sycl/device_aspect_macros.hpp +++ b/sycl/include/sycl/device_aspect_macros.hpp @@ -183,11 +183,6 @@ #define __SYCL_ALL_DEVICES_HAVE_ext_oneapi_cuda_async_barrier__ 0 #endif -#ifndef __SYCL_ALL_DEVICES_HAVE_ext_oneapi_bfloat16_math_functions__ -// __SYCL_ASPECT(ext_oneapi_bfloat16_math_functions, 35) -#define __SYCL_ALL_DEVICES_HAVE_ext_oneapi_bfloat16_math_functions__ 0 -#endif - #ifndef __SYCL_ALL_DEVICES_HAVE_ext_intel_free_memory__ // __SYCL_ASPECT(ext_intel_free_memory, 36) #define __SYCL_ALL_DEVICES_HAVE_ext_intel_free_memory__ 0 @@ -518,11 +513,6 @@ #define __SYCL_ANY_DEVICE_HAS_ext_oneapi_cuda_async_barrier__ 0 #endif -#ifndef __SYCL_ANY_DEVICE_HAS_ext_oneapi_bfloat16_math_functions__ -// __SYCL_ASPECT(ext_oneapi_bfloat16_math_functions, 35) -#define __SYCL_ANY_DEVICE_HAS_ext_oneapi_bfloat16_math_functions__ 0 -#endif - #ifndef __SYCL_ANY_DEVICE_HAS_ext_intel_free_memory__ // __SYCL_ASPECT(ext_intel_free_memory, 36) #define __SYCL_ANY_DEVICE_HAS_ext_intel_free_memory__ 0 diff --git a/sycl/include/sycl/info/aspects.def b/sycl/include/sycl/info/aspects.def index 187a6627152e0..8fc6f582d8ce1 100644 --- a/sycl/include/sycl/info/aspects.def +++ b/sycl/include/sycl/info/aspects.def @@ -29,7 +29,6 @@ __SYCL_ASPECT(ext_oneapi_native_assert, 31) __SYCL_ASPECT(host_debuggable, 32) __SYCL_ASPECT(ext_intel_gpu_hw_threads_per_eu, 33) __SYCL_ASPECT(ext_oneapi_cuda_async_barrier, 34) -__SYCL_ASPECT(ext_oneapi_bfloat16_math_functions, 35) __SYCL_ASPECT(ext_intel_free_memory, 36) __SYCL_ASPECT(ext_intel_device_id, 37) __SYCL_ASPECT(ext_intel_memory_clock_rate, 38) diff --git a/sycl/include/sycl/info/device_traits.def b/sycl/include/sycl/info/device_traits.def index 170f6d42c6177..a14b81d6d0db4 100644 --- a/sycl/include/sycl/info/device_traits.def +++ b/sycl/include/sycl/info/device_traits.def @@ -202,8 +202,6 @@ __SYCL_PARAM_TRAITS_SPEC(device, ext_oneapi_srgb, bool, PI_DEVICE_INFO_IMAGE_SRGB) __SYCL_PARAM_TRAITS_SPEC(device, ext_intel_mem_channel, bool, PI_EXT_INTEL_DEVICE_INFO_MEM_CHANNEL_SUPPORT) -__SYCL_PARAM_TRAITS_SPEC(device, ext_oneapi_bfloat16_math_functions, bool, - PI_EXT_ONEAPI_DEVICE_INFO_BFLOAT16_MATH_FUNCTIONS) //Deprecated oneapi/intel extension //TODO:Remove when possible diff --git a/sycl/source/detail/device_impl.cpp b/sycl/source/detail/device_impl.cpp index d506821818953..a593b9849446f 100644 --- a/sycl/source/detail/device_impl.cpp +++ b/sycl/source/detail/device_impl.cpp @@ -372,8 +372,6 @@ bool device_impl::has(aspect Aspect) const { return has_extension("cl_khr_fp16"); case aspect::fp64: return has_extension("cl_khr_fp64"); - case aspect::ext_oneapi_bfloat16_math_functions: - return get_info(); case aspect::int64_base_atomics: return has_extension("cl_khr_int64_base_atomics"); case aspect::int64_extended_atomics: diff --git a/sycl/source/detail/device_info.hpp b/sycl/source/detail/device_info.hpp index a56281b862ef3..455fc6a8e6a10 100644 --- a/sycl/source/detail/device_info.hpp +++ b/sycl/source/detail/device_info.hpp @@ -299,25 +299,6 @@ struct get_device_info_impl, } }; -// Specialization for bf16 math functions -template <> -struct get_device_info_impl { - static bool get(const DeviceImplPtr &Dev) { - bool result = false; - - sycl::detail::pi::PiResult Err = - Dev->getPlugin()->call_nocheck( - Dev->getHandleRef(), - PiInfoCode::value, - sizeof(result), &result, nullptr); - if (Err != PI_SUCCESS) { - return false; - } - return result; - } -}; - // Specialization for exec_capabilities, OpenCL returns a bitfield template <> struct get_device_info_impl, @@ -1538,12 +1519,6 @@ get_device_info_host() { memory_scope::work_group, memory_scope::device, memory_scope::system}; } -template <> -inline bool -get_device_info_host() { - return false; -} - template <> inline uint32_t get_device_info_host() { // current value is the required minimum diff --git a/sycl/test-e2e/BFloat16/bfloat16_builtins.cpp b/sycl/test-e2e/BFloat16/bfloat16_builtins.cpp index e1cf00d5e887f..1462b0e987e3f 100644 --- a/sycl/test-e2e/BFloat16/bfloat16_builtins.cpp +++ b/sycl/test-e2e/BFloat16/bfloat16_builtins.cpp @@ -5,7 +5,6 @@ // + below sm_80 always uses generic impls // DEFINE: %{mathflags} = %if cl_options %{/clang:-fno-fast-math%} %else %{-fno-fast-math%} -// REQUIRES: aspect-ext_oneapi_bfloat16_math_functions // RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %if any-device-is-cuda %{ -Xsycl-target-backend --cuda-gpu-arch=sm_80 %} %s -o %t.out %{mathflags} // RUN: %{run} %t.out diff --git a/sycl/test-e2e/Basic/aspects.cpp b/sycl/test-e2e/Basic/aspects.cpp index 75daf263e638e..bd47d1b4bf1cb 100644 --- a/sycl/test-e2e/Basic/aspects.cpp +++ b/sycl/test-e2e/Basic/aspects.cpp @@ -51,9 +51,6 @@ int main() { if (plt.has(aspect::fp64)) { std::cout << " fp64" << std::endl; } - if (plt.has(aspect::ext_oneapi_bfloat16_math_functions)) { - std::cout << " ext_oneapi_bfloat16_math_functions" << std::endl; - } if (plt.has(aspect::atomic64)) { std::cout << " atomic64" << std::endl; } diff --git a/sycl/test-e2e/NonUniformGroups/ballot_group_algorithms.cpp b/sycl/test-e2e/NonUniformGroups/ballot_group_algorithms.cpp index ef495e36dadd2..2407719fb0f33 100644 --- a/sycl/test-e2e/NonUniformGroups/ballot_group_algorithms.cpp +++ b/sycl/test-e2e/NonUniformGroups/ballot_group_algorithms.cpp @@ -62,7 +62,7 @@ int main() { Visible += TmpAcc[Other]; } } - BarrierAcc[WI] = (Visible == BallotGroup.get_local_linear_range()); + /* BarrierAcc[WI] = (Visible == BallotGroup.get_local_linear_range()); // Simple check of group algorithms. uint32_t OriginalLID = SG.get_local_linear_id(); @@ -92,12 +92,12 @@ int main() { } else { NoneAcc[WI] = (NoneResult == true); } +*/ + double ReduceResult = + sycl::reduce_over_group(BallotGroup, double(0.5), sycl::plus<>()); + ReduceAcc[WI] = (ReduceResult == BallotGroupSize/2); - uint32_t ReduceResult = - sycl::reduce_over_group(BallotGroup, 1, sycl::plus<>()); - ReduceAcc[WI] = (ReduceResult == BallotGroupSize); - - uint32_t ExScanResult = + /* uint32_t ExScanResult = sycl::exclusive_scan_over_group(BallotGroup, 1, sycl::plus<>()); ExScanAcc[WI] = (ExScanResult == LID); diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index 28944dad6adba..ca4c172876620 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -4061,7 +4061,6 @@ _ZNK4sycl3_V16device13get_info_implINS0_4info6device32atomic_memory_scope_capabi _ZNK4sycl3_V16device13get_info_implINS0_4info6device33ext_intel_gpu_subslices_per_sliceEEENS0_6detail11ABINeutralTINS6_19is_device_info_descIT_E11return_typeEE4typeEv _ZNK4sycl3_V16device13get_info_implINS0_4info6device33ext_oneapi_max_global_work_groupsEEENS0_6detail11ABINeutralTINS6_19is_device_info_descIT_E11return_typeEE4typeEv _ZNK4sycl3_V16device13get_info_implINS0_4info6device33usm_restricted_shared_allocationsEEENS0_6detail11ABINeutralTINS6_19is_device_info_descIT_E11return_typeEE4typeEv -_ZNK4sycl3_V16device13get_info_implINS0_4info6device34ext_oneapi_bfloat16_math_functionsEEENS0_6detail11ABINeutralTINS6_19is_device_info_descIT_E11return_typeEE4typeEv _ZNK4sycl3_V16device13get_info_implINS0_4info6device35ext_intel_gpu_eu_count_per_subsliceEEENS0_6detail11ABINeutralTINS6_19is_device_info_descIT_E11return_typeEE4typeEv _ZNK4sycl3_V16device13get_info_implINS0_4info6device38sub_group_independent_forward_progressEEENS0_6detail11ABINeutralTINS6_19is_device_info_descIT_E11return_typeEE4typeEv _ZNK4sycl3_V16device13get_info_implINS0_4info6device4nameEEENS0_6detail11ABINeutralTINS6_19is_device_info_descIT_E11return_typeEE4typeEv diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index 5dc49a037a721..d0e96fb33cf03 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -127,7 +127,6 @@ ??$get_info_impl@Uext_intel_max_mem_bandwidth@device@info@_V1@sycl@@@device@_V1@sycl@@AEBA_KXZ ??$get_info_impl@Uext_intel_mem_channel@device@info@_V1@sycl@@@device@_V1@sycl@@AEBA_NXZ ??$get_info_impl@Uext_intel_pci_address@device@info@_V1@sycl@@@device@_V1@sycl@@AEBA?AVstring@detail@12@XZ -??$get_info_impl@Uext_oneapi_bfloat16_math_functions@device@info@_V1@sycl@@@device@_V1@sycl@@AEBA_NXZ ??$get_info_impl@Uext_oneapi_max_global_work_groups@device@info@_V1@sycl@@@device@_V1@sycl@@AEBA_KXZ ??$get_info_impl@Uext_oneapi_max_work_groups_1d@device@info@_V1@sycl@@@device@_V1@sycl@@AEBA?AV?$id@$00@12@XZ ??$get_info_impl@Uext_oneapi_max_work_groups_2d@device@info@_V1@sycl@@@device@_V1@sycl@@AEBA?AV?$id@$01@12@XZ diff --git a/sycl/test/extensions/properties/properties_kernel.cpp b/sycl/test/extensions/properties/properties_kernel.cpp index 65717c2200628..3868c23f7535c 100644 --- a/sycl/test/extensions/properties/properties_kernel.cpp +++ b/sycl/test/extensions/properties/properties_kernel.cpp @@ -24,7 +24,6 @@ using device_has_all = aspect::ext_oneapi_native_assert, aspect::host_debuggable, aspect::ext_intel_gpu_hw_threads_per_eu, aspect::ext_oneapi_cuda_async_barrier, - aspect::ext_oneapi_bfloat16_math_functions, aspect::ext_intel_free_memory, aspect::ext_intel_device_id, aspect::ext_intel_memory_clock_rate, aspect::ext_intel_memory_bus_width>); @@ -116,7 +115,6 @@ int main() { singleAspectDeviceHasChecks(); singleAspectDeviceHasChecks(); singleAspectDeviceHasChecks(); - singleAspectDeviceHasChecks(); singleAspectDeviceHasChecks(); singleAspectDeviceHasChecks(); singleAspectDeviceHasChecks(); @@ -128,7 +126,7 @@ int main() { static_assert(is_property_value::value); static_assert(std::is_same_v); - static_assert(device_has_all::value.size() == 36); + static_assert(device_has_all::value.size() == 35); static_assert(device_has_all::value[0] == aspect::cpu); static_assert(device_has_all::value[1] == aspect::gpu); static_assert(device_has_all::value[2] == aspect::accelerator); @@ -169,13 +167,11 @@ int main() { aspect::ext_intel_gpu_hw_threads_per_eu); static_assert(device_has_all::value[30] == aspect::ext_oneapi_cuda_async_barrier); - static_assert(device_has_all::value[31] == - aspect::ext_oneapi_bfloat16_math_functions); - static_assert(device_has_all::value[32] == aspect::ext_intel_free_memory); - static_assert(device_has_all::value[33] == aspect::ext_intel_device_id); - static_assert(device_has_all::value[34] == + static_assert(device_has_all::value[31] == aspect::ext_intel_free_memory); + static_assert(device_has_all::value[32] == aspect::ext_intel_device_id); + static_assert(device_has_all::value[33] == aspect::ext_intel_memory_clock_rate); - static_assert(device_has_all::value[35] == + static_assert(device_has_all::value[34] == aspect::ext_intel_memory_bus_width); return 0; } diff --git a/sycl/test/extensions/properties/properties_kernel_device_has.cpp b/sycl/test/extensions/properties/properties_kernel_device_has.cpp index aa1988837d8c4..c8d377de08958 100644 --- a/sycl/test/extensions/properties/properties_kernel_device_has.cpp +++ b/sycl/test/extensions/properties/properties_kernel_device_has.cpp @@ -9,7 +9,7 @@ using namespace ext::oneapi::experimental; static constexpr auto device_has_all = device_has< aspect::ext_oneapi_cuda_async_barrier, - aspect::ext_oneapi_bfloat16_math_functions, aspect::custom, aspect::fp16, + aspect::custom, aspect::fp16, aspect::fp64, aspect::image, aspect::online_compiler, aspect::online_linker, aspect::queue_profiling, aspect::usm_device_allocations, aspect::usm_system_allocations, aspect::ext_intel_pci_address, aspect::cpu, @@ -131,7 +131,6 @@ int main() { } // CHECK-IR-DAG: !{{[0-9]+}} = !{!"ext_oneapi_cuda_async_barrier", i32 [[ext_oneapi_cuda_async_barrier_ASPECT_MD:[0-9]+]]} -// CHECK-IR-DAG: !{{[0-9]+}} = !{!"ext_oneapi_bfloat16_math_functions", i32 [[ext_oneapi_bfloat16_math_functions_ASPECT_MD:[0-9]+]]} // CHECK-IR-DAG: !{{[0-9]+}} = !{!"custom", i32 [[custom_ASPECT_MD:[0-9]+]]} // CHECK-IR-DAG: !{{[0-9]+}} = !{!"fp16", i32 [[fp16_ASPECT_MD:[0-9]+]]} // CHECK-IR-DAG: !{{[0-9]+}} = !{!"fp64", i32 [[fp64_ASPECT_MD:[0-9]+]]} @@ -165,5 +164,5 @@ int main() { // CHECK-IR-DAG: !{{[0-9]+}} = !{!"ext_intel_free_memory", i32 [[ext_intel_free_memory_ASPECT_MD:[0-9]+]]} // CHECK-IR-DAG: !{{[0-9]+}} = !{!"ext_intel_device_id", i32 [[ext_intel_device_id_ASPECT_MD:[0-9]+]]} -// CHECK-IR-DAG: attributes #[[DHAttr1]] = { {{.*}}"sycl-device-has"="[[ext_oneapi_cuda_async_barrier_ASPECT_MD]],[[ext_oneapi_bfloat16_math_functions_ASPECT_MD]],[[custom_ASPECT_MD]],[[fp16_ASPECT_MD]],[[fp64_ASPECT_MD]],[[image_ASPECT_MD]],[[online_compiler_ASPECT_MD]],[[online_linker_ASPECT_MD]],[[queue_profiling_ASPECT_MD]],[[usm_device_allocations_ASPECT_MD]],[[usm_system_allocations_ASPECT_MD]],[[ext_intel_pci_address_ASPECT_MD]],[[cpu_ASPECT_MD]],[[gpu_ASPECT_MD]],[[accelerator_ASPECT_MD]],[[ext_intel_gpu_eu_count_ASPECT_MD]],[[ext_intel_gpu_subslices_per_slice_ASPECT_MD]],[[ext_intel_gpu_eu_count_per_subslice_ASPECT_MD]],[[ext_intel_max_mem_bandwidth_ASPECT_MD]],[[ext_intel_mem_channel_ASPECT_MD]],[[usm_atomic_host_allocations_ASPECT_MD]],[[usm_atomic_shared_allocations_ASPECT_MD]],[[atomic64_ASPECT_MD]],[[ext_intel_device_info_uuid_ASPECT_MD]],[[ext_oneapi_srgb_ASPECT_MD]],[[ext_intel_gpu_eu_simd_width_ASPECT_MD]],[[ext_intel_gpu_slices_ASPECT_MD]],[[ext_oneapi_native_assert_ASPECT_MD]],[[host_debuggable_ASPECT_MD]],[[ext_intel_gpu_hw_threads_per_eu_ASPECT_MD]],[[usm_host_allocations_ASPECT_MD]],[[usm_shared_allocations_ASPECT_MD]],[[ext_intel_free_memory_ASPECT_MD]],[[ext_intel_device_id_ASPECT_MD]]" -// CHECK-IR-DAG: attributes #[[DHAttr2]] = { {{.*}}"sycl-device-has"="[[ext_oneapi_cuda_async_barrier_ASPECT_MD]],[[ext_oneapi_bfloat16_math_functions_ASPECT_MD]],[[custom_ASPECT_MD]],[[fp16_ASPECT_MD]],[[fp64_ASPECT_MD]],[[image_ASPECT_MD]],[[online_compiler_ASPECT_MD]],[[online_linker_ASPECT_MD]],[[queue_profiling_ASPECT_MD]],[[usm_device_allocations_ASPECT_MD]],[[usm_system_allocations_ASPECT_MD]],[[ext_intel_pci_address_ASPECT_MD]],[[cpu_ASPECT_MD]],[[gpu_ASPECT_MD]],[[accelerator_ASPECT_MD]],[[ext_intel_gpu_eu_count_ASPECT_MD]],[[ext_intel_gpu_subslices_per_slice_ASPECT_MD]],[[ext_intel_gpu_eu_count_per_subslice_ASPECT_MD]],[[ext_intel_max_mem_bandwidth_ASPECT_MD]],[[ext_intel_mem_channel_ASPECT_MD]],[[usm_atomic_host_allocations_ASPECT_MD]],[[usm_atomic_shared_allocations_ASPECT_MD]],[[atomic64_ASPECT_MD]],[[ext_intel_device_info_uuid_ASPECT_MD]],[[ext_oneapi_srgb_ASPECT_MD]],[[ext_intel_gpu_eu_simd_width_ASPECT_MD]],[[ext_intel_gpu_slices_ASPECT_MD]],[[ext_oneapi_native_assert_ASPECT_MD]],[[host_debuggable_ASPECT_MD]],[[ext_intel_gpu_hw_threads_per_eu_ASPECT_MD]],[[usm_host_allocations_ASPECT_MD]],[[usm_shared_allocations_ASPECT_MD]],[[ext_intel_free_memory_ASPECT_MD]],[[ext_intel_device_id_ASPECT_MD]]" +// CHECK-IR-DAG: attributes #[[DHAttr1]] = { {{.*}}"sycl-device-has"="[[ext_oneapi_cuda_async_barrier_ASPECT_MD]],[[custom_ASPECT_MD]],[[fp16_ASPECT_MD]],[[fp64_ASPECT_MD]],[[image_ASPECT_MD]],[[online_compiler_ASPECT_MD]],[[online_linker_ASPECT_MD]],[[queue_profiling_ASPECT_MD]],[[usm_device_allocations_ASPECT_MD]],[[usm_system_allocations_ASPECT_MD]],[[ext_intel_pci_address_ASPECT_MD]],[[cpu_ASPECT_MD]],[[gpu_ASPECT_MD]],[[accelerator_ASPECT_MD]],[[ext_intel_gpu_eu_count_ASPECT_MD]],[[ext_intel_gpu_subslices_per_slice_ASPECT_MD]],[[ext_intel_gpu_eu_count_per_subslice_ASPECT_MD]],[[ext_intel_max_mem_bandwidth_ASPECT_MD]],[[ext_intel_mem_channel_ASPECT_MD]],[[usm_atomic_host_allocations_ASPECT_MD]],[[usm_atomic_shared_allocations_ASPECT_MD]],[[atomic64_ASPECT_MD]],[[ext_intel_device_info_uuid_ASPECT_MD]],[[ext_oneapi_srgb_ASPECT_MD]],[[ext_intel_gpu_eu_simd_width_ASPECT_MD]],[[ext_intel_gpu_slices_ASPECT_MD]],[[ext_oneapi_native_assert_ASPECT_MD]],[[host_debuggable_ASPECT_MD]],[[ext_intel_gpu_hw_threads_per_eu_ASPECT_MD]],[[usm_host_allocations_ASPECT_MD]],[[usm_shared_allocations_ASPECT_MD]],[[ext_intel_free_memory_ASPECT_MD]],[[ext_intel_device_id_ASPECT_MD]]" +// CHECK-IR-DAG: attributes #[[DHAttr2]] = { {{.*}}"sycl-device-has"="[[ext_oneapi_cuda_async_barrier_ASPECT_MD]],[[custom_ASPECT_MD]],[[fp16_ASPECT_MD]],[[fp64_ASPECT_MD]],[[image_ASPECT_MD]],[[online_compiler_ASPECT_MD]],[[online_linker_ASPECT_MD]],[[queue_profiling_ASPECT_MD]],[[usm_device_allocations_ASPECT_MD]],[[usm_system_allocations_ASPECT_MD]],[[ext_intel_pci_address_ASPECT_MD]],[[cpu_ASPECT_MD]],[[gpu_ASPECT_MD]],[[accelerator_ASPECT_MD]],[[ext_intel_gpu_eu_count_ASPECT_MD]],[[ext_intel_gpu_subslices_per_slice_ASPECT_MD]],[[ext_intel_gpu_eu_count_per_subslice_ASPECT_MD]],[[ext_intel_max_mem_bandwidth_ASPECT_MD]],[[ext_intel_mem_channel_ASPECT_MD]],[[usm_atomic_host_allocations_ASPECT_MD]],[[usm_atomic_shared_allocations_ASPECT_MD]],[[atomic64_ASPECT_MD]],[[ext_intel_device_info_uuid_ASPECT_MD]],[[ext_oneapi_srgb_ASPECT_MD]],[[ext_intel_gpu_eu_simd_width_ASPECT_MD]],[[ext_intel_gpu_slices_ASPECT_MD]],[[ext_oneapi_native_assert_ASPECT_MD]],[[host_debuggable_ASPECT_MD]],[[ext_intel_gpu_hw_threads_per_eu_ASPECT_MD]],[[usm_host_allocations_ASPECT_MD]],[[usm_shared_allocations_ASPECT_MD]],[[ext_intel_free_memory_ASPECT_MD]],[[ext_intel_device_id_ASPECT_MD]]" diff --git a/sycl/test/extensions/properties/properties_kernel_device_has_macro.cpp b/sycl/test/extensions/properties/properties_kernel_device_has_macro.cpp index 3664e8f794ab4..9718cf927d7fd 100644 --- a/sycl/test/extensions/properties/properties_kernel_device_has_macro.cpp +++ b/sycl/test/extensions/properties/properties_kernel_device_has_macro.cpp @@ -9,7 +9,7 @@ using namespace ext::oneapi::experimental; static constexpr auto device_has_all = device_has< aspect::ext_oneapi_cuda_async_barrier, - aspect::ext_oneapi_bfloat16_math_functions, aspect::custom, aspect::fp16, + aspect::custom, aspect::fp16, aspect::fp64, aspect::image, aspect::online_compiler, aspect::online_linker, aspect::queue_profiling, aspect::usm_device_allocations, aspect::usm_system_allocations, aspect::ext_intel_pci_address, aspect::cpu, @@ -47,7 +47,6 @@ int main() { } // CHECK-IR-DAG: !{{[0-9]+}} = !{!"ext_oneapi_cuda_async_barrier", i32 [[ext_oneapi_cuda_async_barrier_ASPECT_MD:[0-9]+]]} -// CHECK-IR-DAG: !{{[0-9]+}} = !{!"ext_oneapi_bfloat16_math_functions", i32 [[ext_oneapi_bfloat16_math_functions_ASPECT_MD:[0-9]+]]} // CHECK-IR-DAG: !{{[0-9]+}} = !{!"custom", i32 [[custom_ASPECT_MD:[0-9]+]]} // CHECK-IR-DAG: !{{[0-9]+}} = !{!"fp16", i32 [[fp16_ASPECT_MD:[0-9]+]]} // CHECK-IR-DAG: !{{[0-9]+}} = !{!"fp64", i32 [[fp64_ASPECT_MD:[0-9]+]]} @@ -83,6 +82,6 @@ int main() { // CHECK-IR-DAG: !{{[0-9]+}} = !{!"ext_intel_memory_clock_rate", i32 [[ext_intel_memory_clock_rate_ASPECT_MD:[0-9]+]]} // CHECK-IR-DAG: !{{[0-9]+}} = !{!"ext_intel_memory_bus_width", i32 [[ext_intel_memory_bus_width_ASPECT_MD:[0-9]+]]} -// CHECK-IR-DAG: attributes #[[DHAttr1]] = { {{.*}}"sycl-device-has"="[[ext_oneapi_cuda_async_barrier_ASPECT_MD]],[[ext_oneapi_bfloat16_math_functions_ASPECT_MD]],[[custom_ASPECT_MD]],[[fp16_ASPECT_MD]],[[fp64_ASPECT_MD]],[[image_ASPECT_MD]],[[online_compiler_ASPECT_MD]],[[online_linker_ASPECT_MD]],[[queue_profiling_ASPECT_MD]],[[usm_device_allocations_ASPECT_MD]],[[usm_system_allocations_ASPECT_MD]],[[ext_intel_pci_address_ASPECT_MD]],[[cpu_ASPECT_MD]],[[gpu_ASPECT_MD]],[[accelerator_ASPECT_MD]],[[ext_intel_gpu_eu_count_ASPECT_MD]],[[ext_intel_gpu_subslices_per_slice_ASPECT_MD]],[[ext_intel_gpu_eu_count_per_subslice_ASPECT_MD]],[[ext_intel_max_mem_bandwidth_ASPECT_MD]],[[ext_intel_mem_channel_ASPECT_MD]],[[usm_atomic_host_allocations_ASPECT_MD]],[[usm_atomic_shared_allocations_ASPECT_MD]],[[atomic64_ASPECT_MD]],[[ext_intel_device_info_uuid_ASPECT_MD]],[[ext_oneapi_srgb_ASPECT_MD]],[[ext_intel_gpu_eu_simd_width_ASPECT_MD]],[[ext_intel_gpu_slices_ASPECT_MD]],[[ext_oneapi_native_assert_ASPECT_MD]],[[host_debuggable_ASPECT_MD]],[[ext_intel_gpu_hw_threads_per_eu_ASPECT_MD]],[[usm_host_allocations_ASPECT_MD]],[[usm_shared_allocations_ASPECT_MD]],[[ext_intel_free_memory_ASPECT_MD]],[[ext_intel_device_id_ASPECT_MD]],[[ext_intel_memory_clock_rate_ASPECT_MD]],[[ext_intel_memory_bus_width_ASPECT_MD]]" +// CHECK-IR-DAG: attributes #[[DHAttr1]] = { {{.*}}"sycl-device-has"="[[ext_oneapi_cuda_async_barrier_ASPECT_MD]],[[custom_ASPECT_MD]],[[fp16_ASPECT_MD]],[[fp64_ASPECT_MD]],[[image_ASPECT_MD]],[[online_compiler_ASPECT_MD]],[[online_linker_ASPECT_MD]],[[queue_profiling_ASPECT_MD]],[[usm_device_allocations_ASPECT_MD]],[[usm_system_allocations_ASPECT_MD]],[[ext_intel_pci_address_ASPECT_MD]],[[cpu_ASPECT_MD]],[[gpu_ASPECT_MD]],[[accelerator_ASPECT_MD]],[[ext_intel_gpu_eu_count_ASPECT_MD]],[[ext_intel_gpu_subslices_per_slice_ASPECT_MD]],[[ext_intel_gpu_eu_count_per_subslice_ASPECT_MD]],[[ext_intel_max_mem_bandwidth_ASPECT_MD]],[[ext_intel_mem_channel_ASPECT_MD]],[[usm_atomic_host_allocations_ASPECT_MD]],[[usm_atomic_shared_allocations_ASPECT_MD]],[[atomic64_ASPECT_MD]],[[ext_intel_device_info_uuid_ASPECT_MD]],[[ext_oneapi_srgb_ASPECT_MD]],[[ext_intel_gpu_eu_simd_width_ASPECT_MD]],[[ext_intel_gpu_slices_ASPECT_MD]],[[ext_oneapi_native_assert_ASPECT_MD]],[[host_debuggable_ASPECT_MD]],[[ext_intel_gpu_hw_threads_per_eu_ASPECT_MD]],[[usm_host_allocations_ASPECT_MD]],[[usm_shared_allocations_ASPECT_MD]],[[ext_intel_free_memory_ASPECT_MD]],[[ext_intel_device_id_ASPECT_MD]],[[ext_intel_memory_clock_rate_ASPECT_MD]],[[ext_intel_memory_bus_width_ASPECT_MD]]" // CHECK-IR-DAG: attributes #[[DHAttr2]] = { {{.*}}"sycl-device-has" {{.*}} // CHECK-IR-DAG: attributes #[[DHAttr3]] = { {{.*}}"sycl-device-has"="[[fp16_ASPECT_MD]],[[atomic64_ASPECT_MD]]" From c7d2e7401ea475348b6bf3c2322afb9f57d3bfd9 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Thu, 11 Apr 2024 06:47:27 -0700 Subject: [PATCH 3/6] Fix format. Remove mistaken change. Signed-off-by: JackAKirk --- .../NonUniformGroups/ballot_group_algorithms.cpp | 12 ++++++------ .../properties/properties_kernel_device_has.cpp | 3 +-- .../properties_kernel_device_has_macro.cpp | 3 +-- 3 files changed, 8 insertions(+), 10 deletions(-) diff --git a/sycl/test-e2e/NonUniformGroups/ballot_group_algorithms.cpp b/sycl/test-e2e/NonUniformGroups/ballot_group_algorithms.cpp index 2407719fb0f33..ef495e36dadd2 100644 --- a/sycl/test-e2e/NonUniformGroups/ballot_group_algorithms.cpp +++ b/sycl/test-e2e/NonUniformGroups/ballot_group_algorithms.cpp @@ -62,7 +62,7 @@ int main() { Visible += TmpAcc[Other]; } } - /* BarrierAcc[WI] = (Visible == BallotGroup.get_local_linear_range()); + BarrierAcc[WI] = (Visible == BallotGroup.get_local_linear_range()); // Simple check of group algorithms. uint32_t OriginalLID = SG.get_local_linear_id(); @@ -92,12 +92,12 @@ int main() { } else { NoneAcc[WI] = (NoneResult == true); } -*/ - double ReduceResult = - sycl::reduce_over_group(BallotGroup, double(0.5), sycl::plus<>()); - ReduceAcc[WI] = (ReduceResult == BallotGroupSize/2); - /* uint32_t ExScanResult = + uint32_t ReduceResult = + sycl::reduce_over_group(BallotGroup, 1, sycl::plus<>()); + ReduceAcc[WI] = (ReduceResult == BallotGroupSize); + + uint32_t ExScanResult = sycl::exclusive_scan_over_group(BallotGroup, 1, sycl::plus<>()); ExScanAcc[WI] = (ExScanResult == LID); diff --git a/sycl/test/extensions/properties/properties_kernel_device_has.cpp b/sycl/test/extensions/properties/properties_kernel_device_has.cpp index c8d377de08958..055b25b920b8b 100644 --- a/sycl/test/extensions/properties/properties_kernel_device_has.cpp +++ b/sycl/test/extensions/properties/properties_kernel_device_has.cpp @@ -8,8 +8,7 @@ using namespace sycl; using namespace ext::oneapi::experimental; static constexpr auto device_has_all = device_has< - aspect::ext_oneapi_cuda_async_barrier, - aspect::custom, aspect::fp16, + aspect::ext_oneapi_cuda_async_barrier, aspect::custom, aspect::fp16, aspect::fp64, aspect::image, aspect::online_compiler, aspect::online_linker, aspect::queue_profiling, aspect::usm_device_allocations, aspect::usm_system_allocations, aspect::ext_intel_pci_address, aspect::cpu, diff --git a/sycl/test/extensions/properties/properties_kernel_device_has_macro.cpp b/sycl/test/extensions/properties/properties_kernel_device_has_macro.cpp index 9718cf927d7fd..3b00f67e08de4 100644 --- a/sycl/test/extensions/properties/properties_kernel_device_has_macro.cpp +++ b/sycl/test/extensions/properties/properties_kernel_device_has_macro.cpp @@ -8,8 +8,7 @@ using namespace sycl; using namespace ext::oneapi::experimental; static constexpr auto device_has_all = device_has< - aspect::ext_oneapi_cuda_async_barrier, - aspect::custom, aspect::fp16, + aspect::ext_oneapi_cuda_async_barrier, aspect::custom, aspect::fp16, aspect::fp64, aspect::image, aspect::online_compiler, aspect::online_linker, aspect::queue_profiling, aspect::usm_device_allocations, aspect::usm_system_allocations, aspect::ext_intel_pci_address, aspect::cpu, From 3cf019190603e049e6aa5ca25a79559fc864e2f2 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Fri, 12 Apr 2024 07:32:52 -0700 Subject: [PATCH 4/6] Check that conversions give same error. --- sycl/test-e2e/BFloat16/bfloat16_conversions.cpp | 1 - sycl/test-e2e/BFloat16/bfloat16_type.cpp | 1 - 2 files changed, 2 deletions(-) diff --git a/sycl/test-e2e/BFloat16/bfloat16_conversions.cpp b/sycl/test-e2e/BFloat16/bfloat16_conversions.cpp index 907faf0b5292a..0929d0d4c84ee 100644 --- a/sycl/test-e2e/BFloat16/bfloat16_conversions.cpp +++ b/sycl/test-e2e/BFloat16/bfloat16_conversions.cpp @@ -6,7 +6,6 @@ // UNSUPPORTED: accelerator // FIXME: enable opaque pointers support on CPU. -// UNSUPPORTED: cpu //==---------- bfloat16_conversions.cpp - SYCL bfloat16 type test ---------==// // diff --git a/sycl/test-e2e/BFloat16/bfloat16_type.cpp b/sycl/test-e2e/BFloat16/bfloat16_type.cpp index ccd04b5c54188..73936eb5dc4e7 100644 --- a/sycl/test-e2e/BFloat16/bfloat16_type.cpp +++ b/sycl/test-e2e/BFloat16/bfloat16_type.cpp @@ -7,7 +7,6 @@ // UNSUPPORTED: accelerator // FIXME: enable opaque pointers support on CPU. -// UNSUPPORTED: cpu //==----------- bfloat16_type.cpp - SYCL bfloat16 type test ----------------==// // From 8afcaa8b47925b5e62bd3b2aca7170f5ae3a0ccf Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Tue, 2 Jul 2024 10:11:15 +0100 Subject: [PATCH 5/6] Remove aspect from deviceConfigfile. Signed-off-by: JackAKirk --- llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td b/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td index faea9b3391166..399a3ebe7704e 100644 --- a/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td +++ b/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td @@ -196,17 +196,17 @@ def : CudaTargetInfo<"nvidia_gpu_sm_70", !listconcat(CudaMinAspects, CudaBindles def : CudaTargetInfo<"nvidia_gpu_sm_72", !listconcat(CudaMinAspects, CudaBindlessImagesAspects, [AspectFp16, AspectAtomic64])>; def : CudaTargetInfo<"nvidia_gpu_sm_75", !listconcat(CudaMinAspects, CudaBindlessImagesAspects, [AspectFp16, AspectAtomic64])>; def : CudaTargetInfo<"nvidia_gpu_sm_80", !listconcat(CudaMinAspects, CudaBindlessImagesAspects, - [AspectFp16, AspectAtomic64, AspectExt_oneapi_bfloat16_math_functions, AspectExt_oneapi_cuda_async_barrier])>; + [AspectFp16, AspectAtomic64, AspectExt_oneapi_cuda_async_barrier])>; def : CudaTargetInfo<"nvidia_gpu_sm_86", !listconcat(CudaMinAspects, CudaBindlessImagesAspects, - [AspectFp16, AspectAtomic64, AspectExt_oneapi_bfloat16_math_functions, AspectExt_oneapi_cuda_async_barrier])>; + [AspectFp16, AspectAtomic64, AspectExt_oneapi_cuda_async_barrier])>; def : CudaTargetInfo<"nvidia_gpu_sm_87", !listconcat(CudaMinAspects, CudaBindlessImagesAspects, - [AspectFp16, AspectAtomic64, AspectExt_oneapi_bfloat16_math_functions, AspectExt_oneapi_cuda_async_barrier])>; + [AspectFp16, AspectAtomic64, AspectExt_oneapi_cuda_async_barrier])>; def : CudaTargetInfo<"nvidia_gpu_sm_89", !listconcat(CudaMinAspects, CudaBindlessImagesAspects, - [AspectFp16, AspectAtomic64, AspectExt_oneapi_bfloat16_math_functions, AspectExt_oneapi_cuda_async_barrier])>; + [AspectFp16, AspectAtomic64, AspectExt_oneapi_cuda_async_barrier])>; def : CudaTargetInfo<"nvidia_gpu_sm_90", !listconcat(CudaMinAspects, CudaBindlessImagesAspects, - [AspectFp16, AspectAtomic64, AspectExt_oneapi_bfloat16_math_functions, AspectExt_oneapi_cuda_async_barrier])>; + [AspectFp16, AspectAtomic64, AspectExt_oneapi_cuda_async_barrier])>; def : CudaTargetInfo<"nvidia_gpu_sm_90a", !listconcat(CudaMinAspects, CudaBindlessImagesAspects, - [AspectFp16, AspectAtomic64, AspectExt_oneapi_bfloat16_math_functions, AspectExt_oneapi_cuda_async_barrier])>; + [AspectFp16, AspectAtomic64, AspectExt_oneapi_cuda_async_barrier])>; // // HIP / AMDGPU device aspects From 4f27e0b816da70b89edc1de467150e2833bdd833 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Tue, 2 Jul 2024 11:49:42 +0100 Subject: [PATCH 6/6] Don't change spirv tests. Signed-off-by: JackAKirk --- .../array_of_matrices.ll | 129 +++++++++--------- .../joint_matrix_bfloat16.ll | 71 +++++----- .../joint_matrix_half.ll | 73 +++++----- .../array_of_matrices.ll | 129 +++++++++--------- .../device-code-split/vtable.ll | 49 +++---- 5 files changed, 228 insertions(+), 223 deletions(-) diff --git a/llvm-spirv/test/extensions/INTEL/SPV_INTEL_joint_matrix/array_of_matrices.ll b/llvm-spirv/test/extensions/INTEL/SPV_INTEL_joint_matrix/array_of_matrices.ll index eb744064c87df..0571af5dd975c 100644 --- a/llvm-spirv/test/extensions/INTEL/SPV_INTEL_joint_matrix/array_of_matrices.ll +++ b/llvm-spirv/test/extensions/INTEL/SPV_INTEL_joint_matrix/array_of_matrices.ll @@ -369,67 +369,68 @@ attributes #5 = { convergent nounwind } !32 = !{!"host_debuggable", i32 32} !33 = !{!"ext_intel_gpu_hw_threads_per_eu", i32 33} !34 = !{!"ext_oneapi_cuda_async_barrier", i32 34} -!35 = !{!"ext_intel_free_memory", i32 36} -!36 = !{!"ext_intel_device_id", i32 37} -!37 = !{!"ext_intel_memory_clock_rate", i32 38} -!38 = !{!"ext_intel_memory_bus_width", i32 39} -!39 = !{!"emulated", i32 40} -!40 = !{!"ext_intel_legacy_image", i32 41} -!41 = !{!"ext_oneapi_bindless_images", i32 42} -!42 = !{!"ext_oneapi_bindless_images_shared_usm", i32 43} -!43 = !{!"ext_oneapi_bindless_images_1d_usm", i32 44} -!44 = !{!"ext_oneapi_bindless_images_2d_usm", i32 45} -!45 = !{!"ext_oneapi_interop_memory_import", i32 46} -!46 = !{!"ext_oneapi_interop_memory_export", i32 47} -!47 = !{!"ext_oneapi_interop_semaphore_import", i32 48} -!48 = !{!"ext_oneapi_interop_semaphore_export", i32 49} -!49 = !{!"ext_oneapi_mipmap", i32 50} -!50 = !{!"ext_oneapi_mipmap_anisotropy", i32 51} -!51 = !{!"ext_oneapi_mipmap_level_reference", i32 52} -!52 = !{!"int64_base_atomics", i32 7} -!53 = !{!"int64_extended_atomics", i32 8} -!54 = !{!"usm_system_allocator", i32 17} -!55 = !{!"usm_restricted_shared_allocations", i32 16} -!56 = !{!"host", i32 0} -!57 = !{!"clang version 18.0.0 (https://github.com/intel/llvm.git cc440821c30daabef517c7c8ff75546719f8094c)"} -!58 = !{i32 242145} -!59 = !{i32 -1, i32 -1, i32 -1} -!60 = !{i32 16} -!61 = !{} -!62 = !{i1 false, i1 false, i1 false} -!63 = !{!65, !65, i64 0} -!64 = !{!"any pointer", !66, i64 0} -!65 = !{!"omnipotent char", !67, i64 0} -!66 = !{!"Simple C++ TBAA"} -!67 = !{!69, !71, !73} -!68 = distinct !{!69, !70, !"_ZN7__spirv22InitSizesSTWorkgroupIdILi2EN4sycl3_V12idILi2EEEE8initSizeEv: %agg.result"} -!69 = distinct !{!70, !"_ZN7__spirv22InitSizesSTWorkgroupIdILi2EN4sycl3_V12idILi2EEEE8initSizeEv"} -!70 = distinct !{!71, !72, !"_ZN7__spirv15initWorkgroupIdILi2EN4sycl3_V12idILi2EEEEET0_v: %agg.result"} -!71 = distinct !{!72, !"_ZN7__spirv15initWorkgroupIdILi2EN4sycl3_V12idILi2EEEEET0_v"} -!72 = distinct !{!73, !74, !"_ZN4sycl3_V16detail7Builder10getElementILi2EEEKNS0_7nd_itemIXT_EEEPS5_: %agg.result"} -!73 = distinct !{!74, !"_ZN4sycl3_V16detail7Builder10getElementILi2EEEKNS0_7nd_itemIXT_EEEPS5_"} -!74 = !{!76, !78, !73} -!75 = distinct !{!76, !77, !"_ZN7__spirv28InitSizesSTLocalInvocationIdILi2EN4sycl3_V12idILi2EEEE8initSizeEv: %agg.result"} -!76 = distinct !{!77, !"_ZN7__spirv28InitSizesSTLocalInvocationIdILi2EN4sycl3_V12idILi2EEEE8initSizeEv"} -!77 = distinct !{!78, !79, !"_ZN7__spirv21initLocalInvocationIdILi2EN4sycl3_V12idILi2EEEEET0_v: %agg.result"} -!78 = distinct !{!79, !"_ZN7__spirv21initLocalInvocationIdILi2EN4sycl3_V12idILi2EEEEET0_v"} -!79 = distinct !{!80, !81} -!80 = !{!"llvm.loop.mustprogress"} -!81 = !{!83, !65, i64 0} -!82 = !{!"_ZTSN4sycl3_V13ext6oneapi12experimental6matrix12joint_matrixINS0_9sub_groupEfLNS4_3useE2ELm8ELm16ELNS4_6layoutE3EEE", !65, i64 0} -!83 = distinct !{!84, !81} -!84 = distinct !{!85, !81} -!85 = !{!87, !65, i64 0} -!86 = !{!"_ZTSN4sycl3_V13ext6oneapi12experimental6matrix12joint_matrixINS0_9sub_groupENS2_8bfloat16ELNS4_3useE0ELm8ELm16ELNS4_6layoutE0EEE", !65, i64 0} -!87 = distinct !{!88, !81} -!88 = !{!90, !65, i64 0} -!89 = !{!"_ZTSN4sycl3_V13ext6oneapi12experimental6matrix12joint_matrixINS0_9sub_groupENS2_8bfloat16ELNS4_3useE1ELm16ELm16ELNS4_6layoutE2EEE", !65, i64 0} -!90 = distinct !{!91, !81} -!91 = !{!93} -!92 = distinct !{!93, !94, !"_ZN4sycl3_V13ext6oneapi12experimental6matrix16joint_matrix_madINS0_9sub_groupENS2_8bfloat16ES7_fLm8ELm16ELm16ELNS4_6layoutE0ELS8_2EEENS4_12joint_matrixIT_T2_LNS4_3useE2EXT3_EXT5_ELS8_3EEESA_RNS9_ISA_T0_LSC_0EXT3_EXT4_EXT6_EEERNS9_ISA_T1_LSC_1EXT4_EXT5_EXT7_EEERSD_: %agg.result"} -!93 = distinct !{!94, !"_ZN4sycl3_V13ext6oneapi12experimental6matrix16joint_matrix_madINS0_9sub_groupENS2_8bfloat16ES7_fLm8ELm16ELm16ELNS4_6layoutE0ELS8_2EEENS4_12joint_matrixIT_T2_LNS4_3useE2EXT3_EXT5_ELS8_3EEESA_RNS9_ISA_T0_LSC_0EXT3_EXT4_EXT6_EEERNS9_ISA_T1_LSC_1EXT4_EXT5_EXT7_EEERSD_"} -!94 = distinct !{!95, !81} -!95 = distinct !{!96, !81} -!96 = distinct !{!97, !81} -!97 = distinct !{!98, !81} -!98 = distinct !{!99, !81} +!35 = !{!"ext_oneapi_bfloat16_math_functions", i32 35} +!36 = !{!"ext_intel_free_memory", i32 36} +!37 = !{!"ext_intel_device_id", i32 37} +!38 = !{!"ext_intel_memory_clock_rate", i32 38} +!39 = !{!"ext_intel_memory_bus_width", i32 39} +!40 = !{!"emulated", i32 40} +!41 = !{!"ext_intel_legacy_image", i32 41} +!42 = !{!"ext_oneapi_bindless_images", i32 42} +!43 = !{!"ext_oneapi_bindless_images_shared_usm", i32 43} +!44 = !{!"ext_oneapi_bindless_images_1d_usm", i32 44} +!45 = !{!"ext_oneapi_bindless_images_2d_usm", i32 45} +!46 = !{!"ext_oneapi_interop_memory_import", i32 46} +!47 = !{!"ext_oneapi_interop_memory_export", i32 47} +!48 = !{!"ext_oneapi_interop_semaphore_import", i32 48} +!49 = !{!"ext_oneapi_interop_semaphore_export", i32 49} +!50 = !{!"ext_oneapi_mipmap", i32 50} +!51 = !{!"ext_oneapi_mipmap_anisotropy", i32 51} +!52 = !{!"ext_oneapi_mipmap_level_reference", i32 52} +!53 = !{!"int64_base_atomics", i32 7} +!54 = !{!"int64_extended_atomics", i32 8} +!55 = !{!"usm_system_allocator", i32 17} +!56 = !{!"usm_restricted_shared_allocations", i32 16} +!57 = !{!"host", i32 0} +!58 = !{!"clang version 18.0.0 (https://github.com/intel/llvm.git cc440821c30daabef517c7c8ff75546719f8094c)"} +!59 = !{i32 242145} +!60 = !{i32 -1, i32 -1, i32 -1} +!61 = !{i32 16} +!62 = !{} +!63 = !{i1 false, i1 false, i1 false} +!64 = !{!65, !65, i64 0} +!65 = !{!"any pointer", !66, i64 0} +!66 = !{!"omnipotent char", !67, i64 0} +!67 = !{!"Simple C++ TBAA"} +!68 = !{!69, !71, !73} +!69 = distinct !{!69, !70, !"_ZN7__spirv22InitSizesSTWorkgroupIdILi2EN4sycl3_V12idILi2EEEE8initSizeEv: %agg.result"} +!70 = distinct !{!70, !"_ZN7__spirv22InitSizesSTWorkgroupIdILi2EN4sycl3_V12idILi2EEEE8initSizeEv"} +!71 = distinct !{!71, !72, !"_ZN7__spirv15initWorkgroupIdILi2EN4sycl3_V12idILi2EEEEET0_v: %agg.result"} +!72 = distinct !{!72, !"_ZN7__spirv15initWorkgroupIdILi2EN4sycl3_V12idILi2EEEEET0_v"} +!73 = distinct !{!73, !74, !"_ZN4sycl3_V16detail7Builder10getElementILi2EEEKNS0_7nd_itemIXT_EEEPS5_: %agg.result"} +!74 = distinct !{!74, !"_ZN4sycl3_V16detail7Builder10getElementILi2EEEKNS0_7nd_itemIXT_EEEPS5_"} +!75 = !{!76, !78, !73} +!76 = distinct !{!76, !77, !"_ZN7__spirv28InitSizesSTLocalInvocationIdILi2EN4sycl3_V12idILi2EEEE8initSizeEv: %agg.result"} +!77 = distinct !{!77, !"_ZN7__spirv28InitSizesSTLocalInvocationIdILi2EN4sycl3_V12idILi2EEEE8initSizeEv"} +!78 = distinct !{!78, !79, !"_ZN7__spirv21initLocalInvocationIdILi2EN4sycl3_V12idILi2EEEEET0_v: %agg.result"} +!79 = distinct !{!79, !"_ZN7__spirv21initLocalInvocationIdILi2EN4sycl3_V12idILi2EEEEET0_v"} +!80 = distinct !{!80, !81} +!81 = !{!"llvm.loop.mustprogress"} +!82 = !{!83, !65, i64 0} +!83 = !{!"_ZTSN4sycl3_V13ext6oneapi12experimental6matrix12joint_matrixINS0_9sub_groupEfLNS4_3useE2ELm8ELm16ELNS4_6layoutE3EEE", !65, i64 0} +!84 = distinct !{!84, !81} +!85 = distinct !{!85, !81} +!86 = !{!87, !65, i64 0} +!87 = !{!"_ZTSN4sycl3_V13ext6oneapi12experimental6matrix12joint_matrixINS0_9sub_groupENS2_8bfloat16ELNS4_3useE0ELm8ELm16ELNS4_6layoutE0EEE", !65, i64 0} +!88 = distinct !{!88, !81} +!89 = !{!90, !65, i64 0} +!90 = !{!"_ZTSN4sycl3_V13ext6oneapi12experimental6matrix12joint_matrixINS0_9sub_groupENS2_8bfloat16ELNS4_3useE1ELm16ELm16ELNS4_6layoutE2EEE", !65, i64 0} +!91 = distinct !{!91, !81} +!92 = !{!93} +!93 = distinct !{!93, !94, !"_ZN4sycl3_V13ext6oneapi12experimental6matrix16joint_matrix_madINS0_9sub_groupENS2_8bfloat16ES7_fLm8ELm16ELm16ELNS4_6layoutE0ELS8_2EEENS4_12joint_matrixIT_T2_LNS4_3useE2EXT3_EXT5_ELS8_3EEESA_RNS9_ISA_T0_LSC_0EXT3_EXT4_EXT6_EEERNS9_ISA_T1_LSC_1EXT4_EXT5_EXT7_EEERSD_: %agg.result"} +!94 = distinct !{!94, !"_ZN4sycl3_V13ext6oneapi12experimental6matrix16joint_matrix_madINS0_9sub_groupENS2_8bfloat16ES7_fLm8ELm16ELm16ELNS4_6layoutE0ELS8_2EEENS4_12joint_matrixIT_T2_LNS4_3useE2EXT3_EXT5_ELS8_3EEESA_RNS9_ISA_T0_LSC_0EXT3_EXT4_EXT6_EEERNS9_ISA_T1_LSC_1EXT4_EXT5_EXT7_EEERSD_"} +!95 = distinct !{!95, !81} +!96 = distinct !{!96, !81} +!97 = distinct !{!97, !81} +!98 = distinct !{!98, !81} +!99 = distinct !{!99, !81} diff --git a/llvm-spirv/test/extensions/INTEL/SPV_INTEL_joint_matrix/joint_matrix_bfloat16.ll b/llvm-spirv/test/extensions/INTEL/SPV_INTEL_joint_matrix/joint_matrix_bfloat16.ll index 0188e283ad0d9..80c014b6891f6 100644 --- a/llvm-spirv/test/extensions/INTEL/SPV_INTEL_joint_matrix/joint_matrix_bfloat16.ll +++ b/llvm-spirv/test/extensions/INTEL/SPV_INTEL_joint_matrix/joint_matrix_bfloat16.ll @@ -164,38 +164,39 @@ attributes #3 = { convergent nounwind } !32 = !{!"host_debuggable", i32 32} !33 = !{!"ext_intel_gpu_hw_threads_per_eu", i32 33} !34 = !{!"ext_oneapi_cuda_async_barrier", i32 34} -!35 = !{!"ext_intel_free_memory", i32 36} -!36 = !{!"ext_intel_device_id", i32 37} -!37 = !{!"ext_intel_memory_clock_rate", i32 38} -!38 = !{!"ext_intel_memory_bus_width", i32 39} -!39 = !{!"emulated", i32 40} -!40 = !{!"ext_intel_legacy_image", i32 41} -!41 = !{!"int64_base_atomics", i32 7} -!42 = !{!"int64_extended_atomics", i32 8} -!43 = !{!"usm_system_allocator", i32 17} -!44 = !{!"usm_restricted_shared_allocations", i32 16} -!45 = !{!"host", i32 0} -!46 = !{!"clang version 17.0.0 (https://github.com/intel/llvm.git 93f477358d74ae90024f758e7eeb97d4b13cea42)"} -!47 = !{i32 10642943} -!48 = !{i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1} -!49 = !{i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false} -!50 = !{i32 16} -!51 = !{} -!52 = !{i1 false, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true} -!53 = !{!55, !57, !59} -!54 = distinct !{!55, !56, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi2EN4sycl3_V12idILi2EEEE8initSizeEv: %agg.result"} -!55 = distinct !{!56, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi2EN4sycl3_V12idILi2EEEE8initSizeEv"} -!56 = distinct !{!57, !58, !"_ZN7__spirvL22initGlobalInvocationIdILi2EN4sycl3_V12idILi2EEEEET0_v: %agg.result"} -!57 = distinct !{!58, !"_ZN7__spirvL22initGlobalInvocationIdILi2EN4sycl3_V12idILi2EEEEET0_v"} -!58 = distinct !{!59, !60, !"_ZN4sycl3_V16detail7Builder10getElementILi2EEEKNS0_7nd_itemIXT_EEEPS5_: %agg.result"} -!59 = distinct !{!60, !"_ZN4sycl3_V16detail7Builder10getElementILi2EEEKNS0_7nd_itemIXT_EEEPS5_"} -!60 = !{!62, !64, !59} -!61 = distinct !{!62, !63, !"_ZN7__spirv28InitSizesSTLocalInvocationIdILi2EN4sycl3_V12idILi2EEEE8initSizeEv: %agg.result"} -!62 = distinct !{!63, !"_ZN7__spirv28InitSizesSTLocalInvocationIdILi2EN4sycl3_V12idILi2EEEE8initSizeEv"} -!63 = distinct !{!64, !65, !"_ZN7__spirvL21initLocalInvocationIdILi2EN4sycl3_V12idILi2EEEEET0_v: %agg.result"} -!64 = distinct !{!65, !"_ZN7__spirvL21initLocalInvocationIdILi2EN4sycl3_V12idILi2EEEEET0_v"} -!65 = !{!67} -!66 = distinct !{!67, !68, !"_ZN4sycl3_V13ext6oneapi12experimental6matrix16joint_matrix_madINS0_9sub_groupENS2_8bfloat16ES7_fLm8ELm16ELm16ELNS4_6layoutE0ELS8_2EEENS4_12joint_matrixIT_T2_LNS4_3useE2EXT3_EXT5_ELS8_3EEESA_RNS9_ISA_T0_LSC_0EXT3_EXT4_EXT6_EEERNS9_ISA_T1_LSC_1EXT4_EXT5_EXT7_EEERSD_: %agg.result"} -!67 = distinct !{!68, !"_ZN4sycl3_V13ext6oneapi12experimental6matrix16joint_matrix_madINS0_9sub_groupENS2_8bfloat16ES7_fLm8ELm16ELm16ELNS4_6layoutE0ELS8_2EEENS4_12joint_matrixIT_T2_LNS4_3useE2EXT3_EXT5_ELS8_3EEESA_RNS9_ISA_T0_LSC_0EXT3_EXT4_EXT6_EEERNS9_ISA_T1_LSC_1EXT4_EXT5_EXT7_EEERSD_"} -!68 = distinct !{!69, !70} -!69 = !{!"llvm.loop.mustprogress"} +!35 = !{!"ext_oneapi_bfloat16_math_functions", i32 35} +!36 = !{!"ext_intel_free_memory", i32 36} +!37 = !{!"ext_intel_device_id", i32 37} +!38 = !{!"ext_intel_memory_clock_rate", i32 38} +!39 = !{!"ext_intel_memory_bus_width", i32 39} +!40 = !{!"emulated", i32 40} +!41 = !{!"ext_intel_legacy_image", i32 41} +!42 = !{!"int64_base_atomics", i32 7} +!43 = !{!"int64_extended_atomics", i32 8} +!44 = !{!"usm_system_allocator", i32 17} +!45 = !{!"usm_restricted_shared_allocations", i32 16} +!46 = !{!"host", i32 0} +!47 = !{!"clang version 17.0.0 (https://github.com/intel/llvm.git 93f477358d74ae90024f758e7eeb97d4b13cea42)"} +!48 = !{i32 10642943} +!49 = !{i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1} +!50 = !{i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false} +!51 = !{i32 16} +!52 = !{} +!53 = !{i1 false, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true} +!54 = !{!55, !57, !59} +!55 = distinct !{!55, !56, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi2EN4sycl3_V12idILi2EEEE8initSizeEv: %agg.result"} +!56 = distinct !{!56, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi2EN4sycl3_V12idILi2EEEE8initSizeEv"} +!57 = distinct !{!57, !58, !"_ZN7__spirvL22initGlobalInvocationIdILi2EN4sycl3_V12idILi2EEEEET0_v: %agg.result"} +!58 = distinct !{!58, !"_ZN7__spirvL22initGlobalInvocationIdILi2EN4sycl3_V12idILi2EEEEET0_v"} +!59 = distinct !{!59, !60, !"_ZN4sycl3_V16detail7Builder10getElementILi2EEEKNS0_7nd_itemIXT_EEEPS5_: %agg.result"} +!60 = distinct !{!60, !"_ZN4sycl3_V16detail7Builder10getElementILi2EEEKNS0_7nd_itemIXT_EEEPS5_"} +!61 = !{!62, !64, !59} +!62 = distinct !{!62, !63, !"_ZN7__spirv28InitSizesSTLocalInvocationIdILi2EN4sycl3_V12idILi2EEEE8initSizeEv: %agg.result"} +!63 = distinct !{!63, !"_ZN7__spirv28InitSizesSTLocalInvocationIdILi2EN4sycl3_V12idILi2EEEE8initSizeEv"} +!64 = distinct !{!64, !65, !"_ZN7__spirvL21initLocalInvocationIdILi2EN4sycl3_V12idILi2EEEEET0_v: %agg.result"} +!65 = distinct !{!65, !"_ZN7__spirvL21initLocalInvocationIdILi2EN4sycl3_V12idILi2EEEEET0_v"} +!66 = !{!67} +!67 = distinct !{!67, !68, !"_ZN4sycl3_V13ext6oneapi12experimental6matrix16joint_matrix_madINS0_9sub_groupENS2_8bfloat16ES7_fLm8ELm16ELm16ELNS4_6layoutE0ELS8_2EEENS4_12joint_matrixIT_T2_LNS4_3useE2EXT3_EXT5_ELS8_3EEESA_RNS9_ISA_T0_LSC_0EXT3_EXT4_EXT6_EEERNS9_ISA_T1_LSC_1EXT4_EXT5_EXT7_EEERSD_: %agg.result"} +!68 = distinct !{!68, !"_ZN4sycl3_V13ext6oneapi12experimental6matrix16joint_matrix_madINS0_9sub_groupENS2_8bfloat16ES7_fLm8ELm16ELm16ELNS4_6layoutE0ELS8_2EEENS4_12joint_matrixIT_T2_LNS4_3useE2EXT3_EXT5_ELS8_3EEESA_RNS9_ISA_T0_LSC_0EXT3_EXT4_EXT6_EEERNS9_ISA_T1_LSC_1EXT4_EXT5_EXT7_EEERSD_"} +!69 = distinct !{!69, !70} +!70 = !{!"llvm.loop.mustprogress"} diff --git a/llvm-spirv/test/extensions/INTEL/SPV_INTEL_joint_matrix/joint_matrix_half.ll b/llvm-spirv/test/extensions/INTEL/SPV_INTEL_joint_matrix/joint_matrix_half.ll index 5c7d70ffe14c3..6b5c380de586a 100644 --- a/llvm-spirv/test/extensions/INTEL/SPV_INTEL_joint_matrix/joint_matrix_half.ll +++ b/llvm-spirv/test/extensions/INTEL/SPV_INTEL_joint_matrix/joint_matrix_half.ll @@ -171,39 +171,40 @@ attributes #3 = { convergent nounwind } !33 = !{!"host_debuggable", i32 32} !34 = !{!"ext_intel_gpu_hw_threads_per_eu", i32 33} !35 = !{!"ext_oneapi_cuda_async_barrier", i32 34} -!36 = !{!"ext_intel_free_memory", i32 36} -!37 = !{!"ext_intel_device_id", i32 37} -!38 = !{!"ext_intel_memory_clock_rate", i32 38} -!39 = !{!"ext_intel_memory_bus_width", i32 39} -!40 = !{!"emulated", i32 40} -!41 = !{!"ext_intel_legacy_image", i32 41} -!42 = !{!"int64_base_atomics", i32 7} -!43 = !{!"int64_extended_atomics", i32 8} -!44 = !{!"usm_system_allocator", i32 17} -!45 = !{!"usm_restricted_shared_allocations", i32 16} -!46 = !{!"host", i32 0} -!47 = !{!"clang version 17.0.0 (https://github.com/intel/llvm.git 93f477358d74ae90024f758e7eeb97d4b13cea42)"} -!48 = !{i32 10643216} -!49 = !{i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1} -!50 = !{i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false} -!51 = !{i32 16} -!52 = !{i32 5} -!53 = !{} -!54 = !{i1 false, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false} -!55 = !{!57, !59, !61} -!56 = distinct !{!57, !58, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi2EN4sycl3_V12idILi2EEEE8initSizeEv: %agg.result"} -!57 = distinct !{!58, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi2EN4sycl3_V12idILi2EEEE8initSizeEv"} -!58 = distinct !{!59, !60, !"_ZN7__spirvL22initGlobalInvocationIdILi2EN4sycl3_V12idILi2EEEEET0_v: %agg.result"} -!59 = distinct !{!60, !"_ZN7__spirvL22initGlobalInvocationIdILi2EN4sycl3_V12idILi2EEEEET0_v"} -!60 = distinct !{!61, !62, !"_ZN4sycl3_V16detail7Builder10getElementILi2EEEKNS0_7nd_itemIXT_EEEPS5_: %agg.result"} -!61 = distinct !{!62, !"_ZN4sycl3_V16detail7Builder10getElementILi2EEEKNS0_7nd_itemIXT_EEEPS5_"} -!62 = !{!64, !66, !61} -!63 = distinct !{!64, !65, !"_ZN7__spirv28InitSizesSTLocalInvocationIdILi2EN4sycl3_V12idILi2EEEE8initSizeEv: %agg.result"} -!64 = distinct !{!65, !"_ZN7__spirv28InitSizesSTLocalInvocationIdILi2EN4sycl3_V12idILi2EEEE8initSizeEv"} -!65 = distinct !{!66, !67, !"_ZN7__spirvL21initLocalInvocationIdILi2EN4sycl3_V12idILi2EEEEET0_v: %agg.result"} -!66 = distinct !{!67, !"_ZN7__spirvL21initLocalInvocationIdILi2EN4sycl3_V12idILi2EEEEET0_v"} -!67 = !{!69} -!68 = distinct !{!69, !70, !"_ZN4sycl3_V13ext6oneapi12experimental6matrix16joint_matrix_madINS0_9sub_groupENS0_6detail9half_impl4halfES9_fLm8ELm16ELm16ELNS4_6layoutE0ELSA_2EEENS4_12joint_matrixIT_T2_LNS4_3useE2EXT3_EXT5_ELSA_3EEESC_RNSB_ISC_T0_LSE_0EXT3_EXT4_EXT6_EEERNSB_ISC_T1_LSE_1EXT4_EXT5_EXT7_EEERSF_: %agg.result"} -!69 = distinct !{!70, !"_ZN4sycl3_V13ext6oneapi12experimental6matrix16joint_matrix_madINS0_9sub_groupENS0_6detail9half_impl4halfES9_fLm8ELm16ELm16ELNS4_6layoutE0ELSA_2EEENS4_12joint_matrixIT_T2_LNS4_3useE2EXT3_EXT5_ELSA_3EEESC_RNSB_ISC_T0_LSE_0EXT3_EXT4_EXT6_EEERNSB_ISC_T1_LSE_1EXT4_EXT5_EXT7_EEERSF_"} -!70 = distinct !{!71, !72} -!71 = !{!"llvm.loop.mustprogress"} +!36 = !{!"ext_oneapi_bfloat16_math_functions", i32 35} +!37 = !{!"ext_intel_free_memory", i32 36} +!38 = !{!"ext_intel_device_id", i32 37} +!39 = !{!"ext_intel_memory_clock_rate", i32 38} +!40 = !{!"ext_intel_memory_bus_width", i32 39} +!41 = !{!"emulated", i32 40} +!42 = !{!"ext_intel_legacy_image", i32 41} +!43 = !{!"int64_base_atomics", i32 7} +!44 = !{!"int64_extended_atomics", i32 8} +!45 = !{!"usm_system_allocator", i32 17} +!46 = !{!"usm_restricted_shared_allocations", i32 16} +!47 = !{!"host", i32 0} +!48 = !{!"clang version 17.0.0 (https://github.com/intel/llvm.git 93f477358d74ae90024f758e7eeb97d4b13cea42)"} +!49 = !{i32 10643216} +!50 = !{i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1} +!51 = !{i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false} +!52 = !{i32 16} +!53 = !{i32 5} +!54 = !{} +!55 = !{i1 false, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false} +!56 = !{!57, !59, !61} +!57 = distinct !{!57, !58, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi2EN4sycl3_V12idILi2EEEE8initSizeEv: %agg.result"} +!58 = distinct !{!58, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi2EN4sycl3_V12idILi2EEEE8initSizeEv"} +!59 = distinct !{!59, !60, !"_ZN7__spirvL22initGlobalInvocationIdILi2EN4sycl3_V12idILi2EEEEET0_v: %agg.result"} +!60 = distinct !{!60, !"_ZN7__spirvL22initGlobalInvocationIdILi2EN4sycl3_V12idILi2EEEEET0_v"} +!61 = distinct !{!61, !62, !"_ZN4sycl3_V16detail7Builder10getElementILi2EEEKNS0_7nd_itemIXT_EEEPS5_: %agg.result"} +!62 = distinct !{!62, !"_ZN4sycl3_V16detail7Builder10getElementILi2EEEKNS0_7nd_itemIXT_EEEPS5_"} +!63 = !{!64, !66, !61} +!64 = distinct !{!64, !65, !"_ZN7__spirv28InitSizesSTLocalInvocationIdILi2EN4sycl3_V12idILi2EEEE8initSizeEv: %agg.result"} +!65 = distinct !{!65, !"_ZN7__spirv28InitSizesSTLocalInvocationIdILi2EN4sycl3_V12idILi2EEEE8initSizeEv"} +!66 = distinct !{!66, !67, !"_ZN7__spirvL21initLocalInvocationIdILi2EN4sycl3_V12idILi2EEEEET0_v: %agg.result"} +!67 = distinct !{!67, !"_ZN7__spirvL21initLocalInvocationIdILi2EN4sycl3_V12idILi2EEEEET0_v"} +!68 = !{!69} +!69 = distinct !{!69, !70, !"_ZN4sycl3_V13ext6oneapi12experimental6matrix16joint_matrix_madINS0_9sub_groupENS0_6detail9half_impl4halfES9_fLm8ELm16ELm16ELNS4_6layoutE0ELSA_2EEENS4_12joint_matrixIT_T2_LNS4_3useE2EXT3_EXT5_ELSA_3EEESC_RNSB_ISC_T0_LSE_0EXT3_EXT4_EXT6_EEERNSB_ISC_T1_LSE_1EXT4_EXT5_EXT7_EEERSF_: %agg.result"} +!70 = distinct !{!70, !"_ZN4sycl3_V13ext6oneapi12experimental6matrix16joint_matrix_madINS0_9sub_groupENS0_6detail9half_impl4halfES9_fLm8ELm16ELm16ELNS4_6layoutE0ELSA_2EEENS4_12joint_matrixIT_T2_LNS4_3useE2EXT3_EXT5_ELSA_3EEESC_RNSB_ISC_T0_LSE_0EXT3_EXT4_EXT6_EEERNSB_ISC_T1_LSE_1EXT4_EXT5_EXT7_EEERSF_"} +!71 = distinct !{!71, !72} +!72 = !{!"llvm.loop.mustprogress"} diff --git a/llvm-spirv/test/extensions/KHR/SPV_KHR_cooperative_matrix/array_of_matrices.ll b/llvm-spirv/test/extensions/KHR/SPV_KHR_cooperative_matrix/array_of_matrices.ll index a685787f0e89d..28979b4eb1a10 100644 --- a/llvm-spirv/test/extensions/KHR/SPV_KHR_cooperative_matrix/array_of_matrices.ll +++ b/llvm-spirv/test/extensions/KHR/SPV_KHR_cooperative_matrix/array_of_matrices.ll @@ -370,67 +370,68 @@ attributes #5 = { convergent nounwind } !32 = !{!"host_debuggable", i32 32} !33 = !{!"ext_intel_gpu_hw_threads_per_eu", i32 33} !34 = !{!"ext_oneapi_cuda_async_barrier", i32 34} -!35 = !{!"ext_intel_free_memory", i32 36} -!36 = !{!"ext_intel_device_id", i32 37} -!37 = !{!"ext_intel_memory_clock_rate", i32 38} -!38 = !{!"ext_intel_memory_bus_width", i32 39} -!39 = !{!"emulated", i32 40} -!40 = !{!"ext_intel_legacy_image", i32 41} -!41 = !{!"ext_oneapi_bindless_images", i32 42} -!42 = !{!"ext_oneapi_bindless_images_shared_usm", i32 43} -!43 = !{!"ext_oneapi_bindless_images_1d_usm", i32 44} -!44 = !{!"ext_oneapi_bindless_images_2d_usm", i32 45} -!45 = !{!"ext_oneapi_interop_memory_import", i32 46} -!46 = !{!"ext_oneapi_interop_memory_export", i32 47} -!47 = !{!"ext_oneapi_interop_semaphore_import", i32 48} -!48 = !{!"ext_oneapi_interop_semaphore_export", i32 49} -!49 = !{!"ext_oneapi_mipmap", i32 50} -!50 = !{!"ext_oneapi_mipmap_anisotropy", i32 51} -!51 = !{!"ext_oneapi_mipmap_level_reference", i32 52} -!52 = !{!"int64_base_atomics", i32 7} -!53 = !{!"int64_extended_atomics", i32 8} -!54 = !{!"usm_system_allocator", i32 17} -!55 = !{!"usm_restricted_shared_allocations", i32 16} -!56 = !{!"host", i32 0} -!57 = !{!"clang version 18.0.0 (https://github.com/intel/llvm.git cc440821c30daabef517c7c8ff75546719f8094c)"} -!58 = !{i32 242145} -!59 = !{i32 -1, i32 -1, i32 -1} -!60 = !{i32 16} -!61 = !{} -!62 = !{i1 false, i1 false, i1 false} -!63 = !{!65, !65, i64 0} -!64 = !{!"any pointer", !66, i64 0} -!65 = !{!"omnipotent char", !67, i64 0} -!66 = !{!"Simple C++ TBAA"} -!67 = !{!69, !71, !73} -!68 = distinct !{!69, !70, !"_ZN7__spirv22InitSizesSTWorkgroupIdILi2EN4sycl3_V12idILi2EEEE8initSizeEv: %agg.result"} -!69 = distinct !{!70, !"_ZN7__spirv22InitSizesSTWorkgroupIdILi2EN4sycl3_V12idILi2EEEE8initSizeEv"} -!70 = distinct !{!71, !72, !"_ZN7__spirv15initWorkgroupIdILi2EN4sycl3_V12idILi2EEEEET0_v: %agg.result"} -!71 = distinct !{!72, !"_ZN7__spirv15initWorkgroupIdILi2EN4sycl3_V12idILi2EEEEET0_v"} -!72 = distinct !{!73, !74, !"_ZN4sycl3_V16detail7Builder10getElementILi2EEEKNS0_7nd_itemIXT_EEEPS5_: %agg.result"} -!73 = distinct !{!74, !"_ZN4sycl3_V16detail7Builder10getElementILi2EEEKNS0_7nd_itemIXT_EEEPS5_"} -!74 = !{!76, !78, !73} -!75 = distinct !{!76, !77, !"_ZN7__spirv28InitSizesSTLocalInvocationIdILi2EN4sycl3_V12idILi2EEEE8initSizeEv: %agg.result"} -!76 = distinct !{!77, !"_ZN7__spirv28InitSizesSTLocalInvocationIdILi2EN4sycl3_V12idILi2EEEE8initSizeEv"} -!77 = distinct !{!78, !79, !"_ZN7__spirv21initLocalInvocationIdILi2EN4sycl3_V12idILi2EEEEET0_v: %agg.result"} -!78 = distinct !{!79, !"_ZN7__spirv21initLocalInvocationIdILi2EN4sycl3_V12idILi2EEEEET0_v"} -!79 = distinct !{!80, !81} -!80 = !{!"llvm.loop.mustprogress"} -!81 = !{!83, !65, i64 0} -!82 = !{!"_ZTSN4sycl3_V13ext6oneapi12experimental6matrix12joint_matrixINS0_9sub_groupEfLNS4_3useE2ELm8ELm16ELNS4_6layoutE3EEE", !65, i64 0} -!83 = distinct !{!84, !81} -!84 = distinct !{!85, !81} -!85 = !{!87, !65, i64 0} -!86 = !{!"_ZTSN4sycl3_V13ext6oneapi12experimental6matrix12joint_matrixINS0_9sub_groupENS2_8bfloat16ELNS4_3useE0ELm8ELm16ELNS4_6layoutE0EEE", !65, i64 0} -!87 = distinct !{!88, !81} -!88 = !{!90, !65, i64 0} -!89 = !{!"_ZTSN4sycl3_V13ext6oneapi12experimental6matrix12joint_matrixINS0_9sub_groupENS2_8bfloat16ELNS4_3useE1ELm16ELm16ELNS4_6layoutE2EEE", !65, i64 0} -!90 = distinct !{!91, !81} -!91 = !{!93} -!92 = distinct !{!93, !94, !"_ZN4sycl3_V13ext6oneapi12experimental6matrix16joint_matrix_madINS0_9sub_groupENS2_8bfloat16ES7_fLm8ELm16ELm16ELNS4_6layoutE0ELS8_2EEENS4_12joint_matrixIT_T2_LNS4_3useE2EXT3_EXT5_ELS8_3EEESA_RNS9_ISA_T0_LSC_0EXT3_EXT4_EXT6_EEERNS9_ISA_T1_LSC_1EXT4_EXT5_EXT7_EEERSD_: %agg.result"} -!93 = distinct !{!94, !"_ZN4sycl3_V13ext6oneapi12experimental6matrix16joint_matrix_madINS0_9sub_groupENS2_8bfloat16ES7_fLm8ELm16ELm16ELNS4_6layoutE0ELS8_2EEENS4_12joint_matrixIT_T2_LNS4_3useE2EXT3_EXT5_ELS8_3EEESA_RNS9_ISA_T0_LSC_0EXT3_EXT4_EXT6_EEERNS9_ISA_T1_LSC_1EXT4_EXT5_EXT7_EEERSD_"} -!94 = distinct !{!95, !81} -!95 = distinct !{!96, !81} -!96 = distinct !{!97, !81} -!97 = distinct !{!98, !81} -!98 = distinct !{!99, !81} +!35 = !{!"ext_oneapi_bfloat16_math_functions", i32 35} +!36 = !{!"ext_intel_free_memory", i32 36} +!37 = !{!"ext_intel_device_id", i32 37} +!38 = !{!"ext_intel_memory_clock_rate", i32 38} +!39 = !{!"ext_intel_memory_bus_width", i32 39} +!40 = !{!"emulated", i32 40} +!41 = !{!"ext_intel_legacy_image", i32 41} +!42 = !{!"ext_oneapi_bindless_images", i32 42} +!43 = !{!"ext_oneapi_bindless_images_shared_usm", i32 43} +!44 = !{!"ext_oneapi_bindless_images_1d_usm", i32 44} +!45 = !{!"ext_oneapi_bindless_images_2d_usm", i32 45} +!46 = !{!"ext_oneapi_interop_memory_import", i32 46} +!47 = !{!"ext_oneapi_interop_memory_export", i32 47} +!48 = !{!"ext_oneapi_interop_semaphore_import", i32 48} +!49 = !{!"ext_oneapi_interop_semaphore_export", i32 49} +!50 = !{!"ext_oneapi_mipmap", i32 50} +!51 = !{!"ext_oneapi_mipmap_anisotropy", i32 51} +!52 = !{!"ext_oneapi_mipmap_level_reference", i32 52} +!53 = !{!"int64_base_atomics", i32 7} +!54 = !{!"int64_extended_atomics", i32 8} +!55 = !{!"usm_system_allocator", i32 17} +!56 = !{!"usm_restricted_shared_allocations", i32 16} +!57 = !{!"host", i32 0} +!58 = !{!"clang version 18.0.0 (https://github.com/intel/llvm.git cc440821c30daabef517c7c8ff75546719f8094c)"} +!59 = !{i32 242145} +!60 = !{i32 -1, i32 -1, i32 -1} +!61 = !{i32 16} +!62 = !{} +!63 = !{i1 false, i1 false, i1 false} +!64 = !{!65, !65, i64 0} +!65 = !{!"any pointer", !66, i64 0} +!66 = !{!"omnipotent char", !67, i64 0} +!67 = !{!"Simple C++ TBAA"} +!68 = !{!69, !71, !73} +!69 = distinct !{!69, !70, !"_ZN7__spirv22InitSizesSTWorkgroupIdILi2EN4sycl3_V12idILi2EEEE8initSizeEv: %agg.result"} +!70 = distinct !{!70, !"_ZN7__spirv22InitSizesSTWorkgroupIdILi2EN4sycl3_V12idILi2EEEE8initSizeEv"} +!71 = distinct !{!71, !72, !"_ZN7__spirv15initWorkgroupIdILi2EN4sycl3_V12idILi2EEEEET0_v: %agg.result"} +!72 = distinct !{!72, !"_ZN7__spirv15initWorkgroupIdILi2EN4sycl3_V12idILi2EEEEET0_v"} +!73 = distinct !{!73, !74, !"_ZN4sycl3_V16detail7Builder10getElementILi2EEEKNS0_7nd_itemIXT_EEEPS5_: %agg.result"} +!74 = distinct !{!74, !"_ZN4sycl3_V16detail7Builder10getElementILi2EEEKNS0_7nd_itemIXT_EEEPS5_"} +!75 = !{!76, !78, !73} +!76 = distinct !{!76, !77, !"_ZN7__spirv28InitSizesSTLocalInvocationIdILi2EN4sycl3_V12idILi2EEEE8initSizeEv: %agg.result"} +!77 = distinct !{!77, !"_ZN7__spirv28InitSizesSTLocalInvocationIdILi2EN4sycl3_V12idILi2EEEE8initSizeEv"} +!78 = distinct !{!78, !79, !"_ZN7__spirv21initLocalInvocationIdILi2EN4sycl3_V12idILi2EEEEET0_v: %agg.result"} +!79 = distinct !{!79, !"_ZN7__spirv21initLocalInvocationIdILi2EN4sycl3_V12idILi2EEEEET0_v"} +!80 = distinct !{!80, !81} +!81 = !{!"llvm.loop.mustprogress"} +!82 = !{!83, !65, i64 0} +!83 = !{!"_ZTSN4sycl3_V13ext6oneapi12experimental6matrix12joint_matrixINS0_9sub_groupEfLNS4_3useE2ELm8ELm16ELNS4_6layoutE3EEE", !65, i64 0} +!84 = distinct !{!84, !81} +!85 = distinct !{!85, !81} +!86 = !{!87, !65, i64 0} +!87 = !{!"_ZTSN4sycl3_V13ext6oneapi12experimental6matrix12joint_matrixINS0_9sub_groupENS2_8bfloat16ELNS4_3useE0ELm8ELm16ELNS4_6layoutE0EEE", !65, i64 0} +!88 = distinct !{!88, !81} +!89 = !{!90, !65, i64 0} +!90 = !{!"_ZTSN4sycl3_V13ext6oneapi12experimental6matrix12joint_matrixINS0_9sub_groupENS2_8bfloat16ELNS4_3useE1ELm16ELm16ELNS4_6layoutE2EEE", !65, i64 0} +!91 = distinct !{!91, !81} +!92 = !{!93} +!93 = distinct !{!93, !94, !"_ZN4sycl3_V13ext6oneapi12experimental6matrix16joint_matrix_madINS0_9sub_groupENS2_8bfloat16ES7_fLm8ELm16ELm16ELNS4_6layoutE0ELS8_2EEENS4_12joint_matrixIT_T2_LNS4_3useE2EXT3_EXT5_ELS8_3EEESA_RNS9_ISA_T0_LSC_0EXT3_EXT4_EXT6_EEERNS9_ISA_T1_LSC_1EXT4_EXT5_EXT7_EEERSD_: %agg.result"} +!94 = distinct !{!94, !"_ZN4sycl3_V13ext6oneapi12experimental6matrix16joint_matrix_madINS0_9sub_groupENS2_8bfloat16ES7_fLm8ELm16ELm16ELNS4_6layoutE0ELS8_2EEENS4_12joint_matrixIT_T2_LNS4_3useE2EXT3_EXT5_ELS8_3EEESA_RNS9_ISA_T0_LSC_0EXT3_EXT4_EXT6_EEERNS9_ISA_T1_LSC_1EXT4_EXT5_EXT7_EEERSD_"} +!95 = distinct !{!95, !81} +!96 = distinct !{!96, !81} +!97 = distinct !{!97, !81} +!98 = distinct !{!98, !81} +!99 = distinct !{!99, !81} diff --git a/llvm/test/tools/sycl-post-link/device-code-split/vtable.ll b/llvm/test/tools/sycl-post-link/device-code-split/vtable.ll index 86a2ccafaceb1..6d932aba577f9 100644 --- a/llvm/test/tools/sycl-post-link/device-code-split/vtable.ll +++ b/llvm/test/tools/sycl-post-link/device-code-split/vtable.ll @@ -146,27 +146,28 @@ attributes #1 = { mustprogress norecurse nounwind "frame-pointer"="all" "no-trap !32 = !{!"host_debuggable", i32 32} !33 = !{!"ext_intel_gpu_hw_threads_per_eu", i32 33} !34 = !{!"ext_oneapi_cuda_async_barrier", i32 34} -!35 = !{!"ext_intel_free_memory", i32 36} -!36 = !{!"ext_intel_device_id", i32 37} -!37 = !{!"ext_intel_memory_clock_rate", i32 38} -!38 = !{!"ext_intel_memory_bus_width", i32 39} -!39 = !{!"emulated", i32 40} -!40 = !{!"ext_intel_legacy_image", i32 41} -!41 = !{!"int64_base_atomics", i32 7} -!42 = !{!"int64_extended_atomics", i32 8} -!43 = !{!"usm_system_allocator", i32 17} -!44 = !{!"usm_restricted_shared_allocations", i32 16} -!45 = !{!"host", i32 0} -!46 = !{!"clang version 17.0.0 "} -!47 = !{i32 546} -!48 = !{i32 -1, i32 -1} -!49 = !{} -!50 = !{i1 false, i1 false} -!51 = !{!53, !53, i64 0} -!52 = !{!"vtable pointer", !54, i64 0} -!53 = !{!"Simple C++ TBAA"} -!54 = !{!56, !56, i64 0} -!55 = !{!"any pointer", !57, i64 0} -!56 = !{!"omnipotent char", !54, i64 0} -!57 = !{i32 193} -!58 = !{i32 273} +!35 = !{!"ext_oneapi_bfloat16_math_functions", i32 35} +!36 = !{!"ext_intel_free_memory", i32 36} +!37 = !{!"ext_intel_device_id", i32 37} +!38 = !{!"ext_intel_memory_clock_rate", i32 38} +!39 = !{!"ext_intel_memory_bus_width", i32 39} +!40 = !{!"emulated", i32 40} +!41 = !{!"ext_intel_legacy_image", i32 41} +!42 = !{!"int64_base_atomics", i32 7} +!43 = !{!"int64_extended_atomics", i32 8} +!44 = !{!"usm_system_allocator", i32 17} +!45 = !{!"usm_restricted_shared_allocations", i32 16} +!46 = !{!"host", i32 0} +!47 = !{!"clang version 17.0.0 "} +!48 = !{i32 546} +!49 = !{i32 -1, i32 -1} +!50 = !{} +!51 = !{i1 false, i1 false} +!52 = !{!53, !53, i64 0} +!53 = !{!"vtable pointer", !54, i64 0} +!54 = !{!"Simple C++ TBAA"} +!55 = !{!56, !56, i64 0} +!56 = !{!"any pointer", !57, i64 0} +!57 = !{!"omnipotent char", !54, i64 0} +!58 = !{i32 193} +!59 = !{i32 273}