From 984c88cfcccd0d13dc71fd5514d679e45ce70124 Mon Sep 17 00:00:00 2001 From: fineg74 <61437305+fineg74@users.noreply.github.com> Date: Wed, 6 Mar 2024 12:19:09 -0800 Subject: [PATCH] [SYCL][ESIMD] Change default cache hints for prefetch (#12931) This patch makes it a requirement for user to specify both L1 and L2 cache hints when call prefetch() function, which is similar to the old lsc_prefetch() API. --- sycl/include/sycl/ext/intel/esimd/memory.hpp | 16 +- .../unified_memory_api/Inputs/prefetch.hpp | 450 ++++++------------ sycl/test/esimd/memory_properties.cpp | 79 +-- 3 files changed, 167 insertions(+), 378 deletions(-) diff --git a/sycl/include/sycl/ext/intel/esimd/memory.hpp b/sycl/include/sycl/ext/intel/esimd/memory.hpp index 21c7b0d8c43de..e72d39fbb2b65 100644 --- a/sycl/include/sycl/ext/intel/esimd/memory.hpp +++ b/sycl/include/sycl/ext/intel/esimd/memory.hpp @@ -8595,10 +8595,10 @@ prefetch(const T *p, simd byte_offsets, simd_mask mask, constexpr auto L1Hint = detail::getPropertyValue( - cache_hint::uncached); + cache_hint::none); constexpr auto L2Hint = detail::getPropertyValue( - cache_hint::cached); + cache_hint::none); detail::prefetch_impl(p, byte_offsets, mask); } @@ -8769,10 +8769,10 @@ prefetch(const T *p, OffsetT byte_offset, simd_mask<1> mask, PropertyListT props = {}) { constexpr auto L1Hint = detail::getPropertyValue( - cache_hint::uncached); + cache_hint::none); constexpr auto L2Hint = detail::getPropertyValue( - cache_hint::cached); + cache_hint::none); detail::prefetch_impl(p, byte_offset, mask); } @@ -8931,10 +8931,10 @@ prefetch(AccessorT acc, simd byte_offsets, constexpr auto L1Hint = detail::getPropertyValue( - cache_hint::uncached); + cache_hint::none); constexpr auto L2Hint = detail::getPropertyValue( - cache_hint::cached); + cache_hint::none); detail::prefetch_impl(acc, byte_offsets, mask); #endif // __ESIMD_FORCE_STATELESS_MEM @@ -9130,10 +9130,10 @@ prefetch(AccessorT acc, OffsetT byte_offset, simd_mask<1> mask, #else constexpr auto L1Hint = detail::getPropertyValue( - cache_hint::uncached); + cache_hint::none); constexpr auto L2Hint = detail::getPropertyValue( - cache_hint::cached); + cache_hint::none); detail::prefetch_impl(acc, byte_offset, mask); #endif // __ESIMD_FORCE_STATELESS_MEM diff --git a/sycl/test-e2e/ESIMD/unified_memory_api/Inputs/prefetch.hpp b/sycl/test-e2e/ESIMD/unified_memory_api/Inputs/prefetch.hpp index 3cda02b9c0248..2dc6360ed3f40 100644 --- a/sycl/test-e2e/ESIMD/unified_memory_api/Inputs/prefetch.hpp +++ b/sycl/test-e2e/ESIMD/unified_memory_api/Inputs/prefetch.hpp @@ -41,7 +41,7 @@ bool verify(const T *In, const T *Out, int N, int Size, int VS) { return NumErrors == 0; } -template bool testUSM(queue Q, uint32_t MaskStride, PropertiesT) { @@ -55,8 +55,7 @@ bool testUSM(queue Q, uint32_t MaskStride, PropertiesT) { std::cout << "Running case: T=" << esimd_test::type_name() << ", N=" << N << ", VS=" << VS << ", MaskStride=" << MaskStride << ", Groups=" << Groups << ", Threads=" << Threads - << ", use_mask=" << UseMask << ", use_properties=" << UseProperties - << std::endl; + << ", use_mask=" << UseMask << std::endl; uint16_t Size = Groups * Threads * N; @@ -88,144 +87,72 @@ bool testUSM(queue Q, uint32_t MaskStride, PropertiesT) { simd Vals; if constexpr (VS > 1) { // VS > 1 requires specifying if constexpr (UseMask) { - if constexpr (UseProperties) { - if constexpr (sizeof(T) >= 4) { - if (GlobalID % 4 == 0) // ByteOffset - simd - prefetch(In, ByteOffsets, Pred, Props); - else if (GlobalID % 4 == 1) - __ESIMD_NS::prefetch(In, Pred_1, Props); - else if (GlobalID % 4 == 2) - __ESIMD_NS::prefetch(In, ByteOffset, Pred_1, Props); - else // ByteOffset - simd_view - prefetch(In, ByteOffsetsView, Pred, Props); - } else { - if (GlobalID % 2 == 0) // ByteOffset - simd - prefetch(In, ByteOffsets, Pred, Props); - else // ByteOffset - simd_view - prefetch(In, ByteOffsetsView, Pred, Props); - } - } else { // UseProperties is false - if constexpr (sizeof(T) >= 4) { - if (GlobalID % 4 == 0) // ByteOffset - simd - prefetch(In, ByteOffsets, Pred); - else if (GlobalID % 4 == 1) - __ESIMD_NS::prefetch(In, Pred_1); - else if (GlobalID % 4 == 2) - __ESIMD_NS::prefetch(In, ByteOffset, Pred_1); - else // ByteOffset - simd_view - prefetch(In, ByteOffsetsView, Pred); - } else { - if (GlobalID % 2 == 0) // ByteOffset - simd - prefetch(In, ByteOffsets, Pred); - else // ByteOffset - simd_view - prefetch(In, ByteOffsetsView, Pred); - } + if constexpr (sizeof(T) >= 4) { + if (GlobalID % 4 == 0) // ByteOffset - simd + prefetch(In, ByteOffsets, Pred, Props); + else if (GlobalID % 4 == 1) + __ESIMD_NS::prefetch(In, Pred_1, Props); + else if (GlobalID % 4 == 2) + __ESIMD_NS::prefetch(In, ByteOffset, Pred_1, Props); + else // ByteOffset - simd_view + prefetch(In, ByteOffsetsView, Pred, Props); + } else { + if (GlobalID % 2 == 0) // ByteOffset - simd + prefetch(In, ByteOffsets, Pred, Props); + else // ByteOffset - simd_view + prefetch(In, ByteOffsetsView, Pred, Props); } } else { // UseMask is false - if constexpr (UseProperties) { - if constexpr (sizeof(T) >= 4) { - if (GlobalID % 4 == 0) // ByteOffset - simd - prefetch(In, ByteOffsets, Props); - else if (GlobalID % 4 == 1) - __ESIMD_NS::prefetch(In, Props); - else if (GlobalID % 4 == 2) - __ESIMD_NS::prefetch(In, ByteOffset); - else // ByteOffset - simd_view - prefetch(In, ByteOffsetsView, Props); - } else { - if (GlobalID % 2 == 0) // ByteOffset - simd - prefetch(In, ByteOffsets, Props); - else // ByteOffset - simd_view - prefetch(In, ByteOffsetsView, Props); - } - } else { // UseProperties is false - if constexpr (sizeof(T) >= 4) { - if (GlobalID % 4 == 0) // ByteOffset - simd - prefetch(In, ByteOffsets); - else if (GlobalID % 4 == 1) - __ESIMD_NS::prefetch(In); - else if (GlobalID % 4 == 2) - __ESIMD_NS::prefetch(In, ByteOffset); - else // ByteOffset - simd_view - prefetch(In, ByteOffsetsView); - } else { - if (GlobalID % 2 == 0) // ByteOffset - simd - prefetch(In, ByteOffsets); - else // ByteOffset - simd_view - prefetch(In, ByteOffsetsView); - } + if constexpr (sizeof(T) >= 4) { + if (GlobalID % 4 == 0) // ByteOffset - simd + prefetch(In, ByteOffsets, Props); + else if (GlobalID % 4 == 1) + __ESIMD_NS::prefetch(In, Props); + else if (GlobalID % 4 == 2) + __ESIMD_NS::prefetch(In, ByteOffset, Props); + else // ByteOffset - simd_view + prefetch(In, ByteOffsetsView, Props); + } else { + if (GlobalID % 2 == 0) // ByteOffset - simd + prefetch(In, ByteOffsets, Props); + else // ByteOffset - simd_view + prefetch(In, ByteOffsetsView, Props); } } } else { // if (VS == 1) then can often be omitted - test it here. // C++ FE do simd to simd_view matching. if constexpr (UseMask) { - if constexpr (UseProperties) { - if constexpr (sizeof(T) >= 4) { - if (GlobalID % 4 == 0) // ByteOffset - simd - prefetch(In, ByteOffsets, Pred, Props); - else if (GlobalID % 4 == 1) - __ESIMD_NS::prefetch(In, Pred_1, Props); - else if (GlobalID % 4 == 2) - __ESIMD_NS::prefetch(In, ByteOffset, Pred_1, Props); - else // ByteOffset - simd_view - prefetch(In, ByteOffsetsView, Pred, Props); - } else { - if (GlobalID % 2 == 0) // ByteOffset - simd - prefetch(In, ByteOffsets, Pred, Props); - else // ByteOffset - simd_view - prefetch(In, ByteOffsetsView, Pred, Props); - } - } else { // UseProperties is false - if constexpr (sizeof(T) >= 4) { - if (GlobalID % 4 == 0) // ByteOffset - simd - prefetch(In, ByteOffsets, Pred); - else if (GlobalID % 4 == 1) - __ESIMD_NS::prefetch(In, Pred_1); - else if (GlobalID % 4 == 2) - __ESIMD_NS::prefetch(In, ByteOffset, Pred_1); - else // ByteOffset - simd_view - prefetch(In, ByteOffsetsView, Pred); - } else { - if (GlobalID % 2 == 0) // ByteOffset - simd - prefetch(In, ByteOffsets, Pred); - else // ByteOffset - simd_view - prefetch(In, ByteOffsetsView, Pred); - } + if constexpr (sizeof(T) >= 4) { + if (GlobalID % 4 == 0) // ByteOffset - simd + prefetch(In, ByteOffsets, Pred, Props); + else if (GlobalID % 4 == 1) + __ESIMD_NS::prefetch(In, Pred_1, Props); + else if (GlobalID % 4 == 2) + __ESIMD_NS::prefetch(In, ByteOffset, Pred_1, Props); + else // ByteOffset - simd_view + prefetch(In, ByteOffsetsView, Pred, Props); + } else { + if (GlobalID % 2 == 0) // ByteOffset - simd + prefetch(In, ByteOffsets, Pred, Props); + else // ByteOffset - simd_view + prefetch(In, ByteOffsetsView, Pred, Props); } } else { // UseMask is false - if constexpr (UseProperties) { - if constexpr (sizeof(T) >= 4) { - if (GlobalID % 4 == 0) // ByteOffset - simd - __ESIMD_NS::prefetch(In, ByteOffsets, Props); - else if (GlobalID % 4 == 1) - __ESIMD_NS::prefetch(In, Props); - else if (GlobalID % 4 == 2) - __ESIMD_NS::prefetch(In, ByteOffset, Props); - else // ByteOffset - simd_view - prefetch(In, ByteOffsetsView, Props); - } else { - if (GlobalID % 2 == 0) // ByteOffset - simd - __ESIMD_NS::prefetch(In, ByteOffsets, Props); - else // ByteOffset - simd_view - prefetch(In, ByteOffsetsView, Props); - } + if constexpr (sizeof(T) >= 4) { + if (GlobalID % 4 == 0) // ByteOffset - simd + __ESIMD_NS::prefetch(In, ByteOffsets, Props); + else if (GlobalID % 4 == 1) + __ESIMD_NS::prefetch(In, Props); + else if (GlobalID % 4 == 2) + __ESIMD_NS::prefetch(In, ByteOffset, Props); + else // ByteOffset - simd_view + prefetch(In, ByteOffsetsView, Props); } else { - if constexpr (sizeof(T) >= 4) { - if (GlobalID % 4 == 0) // ByteOffset - simd - prefetch(In, ByteOffsets); - else if (GlobalID % 4 == 1) - __ESIMD_NS::prefetch(In); - else if (GlobalID % 4 == 2) - __ESIMD_NS::prefetch(In, ByteOffset); - else // ByteOffset - simd_view - prefetch(In, ByteOffsetsView); - } else { - if (GlobalID % 2 == 0) // ByteOffset - simd - prefetch(In, ByteOffsets); - else // ByteOffset - simd_view - prefetch(In, ByteOffsetsView); - } + if (GlobalID % 2 == 0) // ByteOffset - simd + __ESIMD_NS::prefetch(In, ByteOffsets, Props); + else // ByteOffset - simd_view + prefetch(In, ByteOffsetsView, Props); } } } // end if (VS == 1) @@ -250,34 +177,36 @@ bool testUSM(queue Q, uint32_t MaskStride, PropertiesT) { template bool testUSM(queue Q) { constexpr bool UseMask = true; - constexpr bool UseProperties = true; properties CacheProps{cache_hint_L1, cache_hint_L2}; bool Passed = true; - Passed &= testUSM(Q, 2, CacheProps); - Passed &= testUSM(Q, 2, CacheProps); - Passed &= testUSM(Q, 2, CacheProps); - Passed &= testUSM(Q, 3, CacheProps); - - Passed &= testUSM(Q, 2, CacheProps); - Passed &= testUSM(Q, 2, CacheProps); - Passed &= testUSM(Q, 2, CacheProps); - Passed &= testUSM(Q, 3, CacheProps); - Passed &= testUSM(Q, 2, CacheProps); + Passed &= testUSM(Q, 2, CacheProps); + Passed &= testUSM(Q, 2, CacheProps); + Passed &= testUSM(Q, 2, CacheProps); + Passed &= testUSM(Q, 3, CacheProps); + Passed &= testUSM(Q, 2, CacheProps); + Passed &= testUSM(Q, 2, CacheProps); + + Passed &= testUSM(Q, 2, CacheProps); + Passed &= testUSM(Q, 2, CacheProps); + Passed &= testUSM(Q, 2, CacheProps); + Passed &= testUSM(Q, 3, CacheProps); + Passed &= testUSM(Q, 2, CacheProps); + Passed &= testUSM(Q, 2, CacheProps); // Check VS > 1. GPU supports only dwords and qwords in this mode. if constexpr (sizeof(T) >= 4) { - Passed &= testUSM(Q, 3, CacheProps); - Passed &= testUSM(Q, 3, CacheProps); - Passed &= testUSM(Q, 3, CacheProps); - Passed &= testUSM(Q, 3, CacheProps); + Passed &= testUSM(Q, 3, CacheProps); + Passed &= testUSM(Q, 3, CacheProps); + Passed &= testUSM(Q, 3, CacheProps); + Passed &= testUSM(Q, 3, CacheProps); } return Passed; } -template bool testACC(queue Q, uint32_t MaskStride, PropertiesT) { @@ -291,8 +220,7 @@ bool testACC(queue Q, uint32_t MaskStride, PropertiesT) { std::cout << "Running case: T=" << esimd_test::type_name() << ", N=" << N << ", VS=" << VS << ", MaskStride=" << MaskStride << ", Groups=" << Groups << ", Threads=" << Threads - << ", use_mask=" << UseMask << ", use_properties=" << UseProperties - << std::endl; + << ", use_mask=" << UseMask << std::endl; uint16_t Size = Groups * Threads * N; @@ -327,144 +255,72 @@ bool testACC(queue Q, uint32_t MaskStride, PropertiesT) { simd Vals; if constexpr (VS > 1) { // VS > 1 requires specifying if constexpr (UseMask) { - if constexpr (UseProperties) { - if constexpr (sizeof(T) >= 4) { - if (GlobalID % 4 == 0) // ByteOffset - simd - prefetch(InAcc, ByteOffsets, Pred, Props); - else if (GlobalID % 4 == 1) - prefetch(InAcc, Pred_1, Props); - else if (GlobalID % 4 == 2) - prefetch(InAcc, ByteOffset, Pred_1, Props); - else // ByteOffset - simd_view - prefetch(InAcc, ByteOffsetsView, Pred, Props); - } else { - if (GlobalID % 2 == 0) // ByteOffset - simd - prefetch(InAcc, ByteOffsets, Pred, Props); - else // ByteOffset - simd_view - prefetch(InAcc, ByteOffsetsView, Pred, Props); - } - } else { // UseProperties is false - if constexpr (sizeof(T) >= 4) { - if (GlobalID % 4 == 0) // ByteOffset - simd - prefetch(InAcc, ByteOffsets, Pred); - else if (GlobalID % 4 == 1) - prefetch(InAcc, Pred_1); - else if (GlobalID % 4 == 2) - prefetch(InAcc, ByteOffset, Pred_1); - else // ByteOffset - simd_view - prefetch(InAcc, ByteOffsetsView, Pred); - } else { - if (GlobalID % 2 == 0) // ByteOffset - simd - prefetch(InAcc, ByteOffsets, Pred); - else // ByteOffset - simd_view - prefetch(InAcc, ByteOffsetsView, Pred); - } + if constexpr (sizeof(T) >= 4) { + if (GlobalID % 4 == 0) // ByteOffset - simd + prefetch(InAcc, ByteOffsets, Pred, Props); + else if (GlobalID % 4 == 1) + prefetch(InAcc, Pred_1, Props); + else if (GlobalID % 4 == 2) + prefetch(InAcc, ByteOffset, Pred_1, Props); + else // ByteOffset - simd_view + prefetch(InAcc, ByteOffsetsView, Pred, Props); + } else { + if (GlobalID % 2 == 0) // ByteOffset - simd + prefetch(InAcc, ByteOffsets, Pred, Props); + else // ByteOffset - simd_view + prefetch(InAcc, ByteOffsetsView, Pred, Props); } } else { // UseMask is false - if constexpr (UseProperties) { - if constexpr (sizeof(T) >= 4) { - if (GlobalID % 4 == 0) // ByteOffset - simd - prefetch(InAcc, ByteOffsets, Props); - else if (GlobalID % 4 == 1) - prefetch(InAcc, Props); - else if (GlobalID % 4 == 2) - prefetch(InAcc, ByteOffset); - else // ByteOffset - simd_view - prefetch(InAcc, ByteOffsetsView, Props); - } else { - if (GlobalID % 2 == 0) // ByteOffset - simd - prefetch(InAcc, ByteOffsets, Props); - else // ByteOffset - simd_view - prefetch(InAcc, ByteOffsetsView, Props); - } - } else { // UseProperties is false - if constexpr (sizeof(T) >= 4) { - if (GlobalID % 4 == 0) // ByteOffset - simd - prefetch(InAcc, ByteOffsets); - else if (GlobalID % 4 == 1) - prefetch(In); - else if (GlobalID % 4 == 2) - prefetch(InAcc, ByteOffset); - else // ByteOffset - simd_view - prefetch(InAcc, ByteOffsetsView); - } else { - if (GlobalID % 2 == 0) // ByteOffset - simd - prefetch(InAcc, ByteOffsets); - else // ByteOffset - simd_view - prefetch(InAcc, ByteOffsetsView); - } + if constexpr (sizeof(T) >= 4) { + if (GlobalID % 4 == 0) // ByteOffset - simd + prefetch(InAcc, ByteOffsets, Props); + else if (GlobalID % 4 == 1) + prefetch(InAcc, Props); + else if (GlobalID % 4 == 2) + prefetch(InAcc, ByteOffset, Props); + else // ByteOffset - simd_view + prefetch(InAcc, ByteOffsetsView, Props); + } else { + if (GlobalID % 2 == 0) // ByteOffset - simd + prefetch(InAcc, ByteOffsets, Props); + else // ByteOffset - simd_view + prefetch(InAcc, ByteOffsetsView, Props); } } } else { // if (VS == 1) then can often be omitted - test it // here. C++ FE do simd to simd_view matching. if constexpr (UseMask) { - if constexpr (UseProperties) { - if constexpr (sizeof(T) >= 4) { - if (GlobalID % 4 == 0) // ByteOffset - simd - prefetch(InAcc, ByteOffsets, Pred, Props); - else if (GlobalID % 4 == 1) - prefetch(InAcc, Pred_1, Props); - else if (GlobalID % 4 == 2) - prefetch(InAcc, ByteOffset, Pred_1, Props); - else // ByteOffset - simd_view - prefetch(InAcc, ByteOffsetsView, Pred, Props); - } else { - if (GlobalID % 2 == 0) // ByteOffset - simd - prefetch(InAcc, ByteOffsets, Pred, Props); - else // ByteOffset - simd_view - prefetch(InAcc, ByteOffsetsView, Pred, Props); - } - } else { // UseProperties is false - if constexpr (sizeof(T) >= 4) { - if (GlobalID % 4 == 0) // ByteOffset - simd - prefetch(InAcc, ByteOffsets, Pred); - else if (GlobalID % 4 == 1) - prefetch(InAcc, Pred_1); - else if (GlobalID % 4 == 2) - prefetch(InAcc, ByteOffset, Pred_1); - else // ByteOffset - simd_view - prefetch(InAcc, ByteOffsetsView, Pred); - } else { - if (GlobalID % 2 == 0) // ByteOffset - simd - prefetch(InAcc, ByteOffsets, Pred); - else // ByteOffset - simd_view - prefetch(InAcc, ByteOffsetsView, Pred); - } + if constexpr (sizeof(T) >= 4) { + if (GlobalID % 4 == 0) // ByteOffset - simd + prefetch(InAcc, ByteOffsets, Pred, Props); + else if (GlobalID % 4 == 1) + prefetch(InAcc, Pred_1, Props); + else if (GlobalID % 4 == 2) + prefetch(InAcc, ByteOffset, Pred_1, Props); + else // ByteOffset - simd_view + prefetch(InAcc, ByteOffsetsView, Pred, Props); + } else { + if (GlobalID % 2 == 0) // ByteOffset - simd + prefetch(InAcc, ByteOffsets, Pred, Props); + else // ByteOffset - simd_view + prefetch(InAcc, ByteOffsetsView, Pred, Props); } } else { // UseMask is false - if constexpr (UseProperties) { - if constexpr (sizeof(T) >= 4) { - if (GlobalID % 4 == 0) // ByteOffset - simd - prefetch(InAcc, ByteOffsets, Props); - else if (GlobalID % 4 == 1) - prefetch(InAcc, Props); - else if (GlobalID % 4 == 2) - prefetch(InAcc, ByteOffset, Props); - else // ByteOffset - simd_view - prefetch(InAcc, ByteOffsetsView, Props); - } else { - if (GlobalID % 2 == 0) // ByteOffset - simd - prefetch(InAcc, ByteOffsets, Props); - else // ByteOffset - simd_view - prefetch(InAcc, ByteOffsetsView, Props); - } + if constexpr (sizeof(T) >= 4) { + if (GlobalID % 4 == 0) // ByteOffset - simd + prefetch(InAcc, ByteOffsets, Props); + else if (GlobalID % 4 == 1) + prefetch(InAcc, Props); + else if (GlobalID % 4 == 2) + prefetch(InAcc, ByteOffset, Props); + else // ByteOffset - simd_view + prefetch(InAcc, ByteOffsetsView, Props); } else { - if constexpr (sizeof(T) >= 4) { - if (GlobalID % 4 == 0) // ByteOffset - simd - prefetch(InAcc, ByteOffsets); - else if (GlobalID % 4 == 1) - prefetch(InAcc); - else if (GlobalID % 4 == 2) - prefetch(InAcc, ByteOffset); - else // ByteOffset - simd_view - prefetch(InAcc, ByteOffsetsView); - } else { - if (GlobalID % 2 == 0) // ByteOffset - simd - prefetch(In, ByteOffsets); - else // ByteOffset - simd_view - prefetch(InAcc, ByteOffsetsView); - } + if (GlobalID % 2 == 0) // ByteOffset - simd + prefetch(InAcc, ByteOffsets, Props); + else // ByteOffset - simd_view + prefetch(InAcc, ByteOffsetsView, Props); } } } // end if (VS == 1) @@ -490,37 +346,35 @@ bool testACC(queue Q, uint32_t MaskStride, PropertiesT) { template bool testACC(queue Q) { constexpr bool UseMask = true; - constexpr bool UseProperties = true; properties CacheProps{cache_hint_L1, cache_hint_L2}; bool Passed = true; - Passed &= testACC(Q, 2, CacheProps); - Passed &= testACC(Q, 2, CacheProps); - Passed &= testACC(Q, 2, CacheProps); - Passed &= testACC(Q, 3, CacheProps); - Passed &= testACC(Q, 3, CacheProps); - Passed &= testACC(Q, 3, CacheProps); - Passed &= testACC(Q, 3, CacheProps); - - Passed &= testACC(Q, 2, CacheProps); - Passed &= testACC(Q, 2, CacheProps); - Passed &= testACC(Q, 2, CacheProps); - Passed &= testACC(Q, 3, CacheProps); - Passed &= testACC(Q, 3, CacheProps); - Passed &= testACC(Q, 2, CacheProps); - Passed &= testACC(Q, 2, CacheProps); - Passed &= testACC(Q, 3, CacheProps); + Passed &= testACC(Q, 2, CacheProps); + Passed &= testACC(Q, 2, CacheProps); + Passed &= testACC(Q, 2, CacheProps); + Passed &= testACC(Q, 3, CacheProps); + Passed &= testACC(Q, 3, CacheProps); + Passed &= testACC(Q, 3, CacheProps); + Passed &= testACC(Q, 3, CacheProps); + + Passed &= testACC(Q, 2, CacheProps); + Passed &= testACC(Q, 2, CacheProps); + Passed &= testACC(Q, 2, CacheProps); + Passed &= testACC(Q, 3, CacheProps); + Passed &= testACC(Q, 3, CacheProps); + Passed &= testACC(Q, 2, CacheProps); + Passed &= testACC(Q, 2, CacheProps); + Passed &= testACC(Q, 3, CacheProps); // Check VS > 1. GPU supports only dwords and qwords in this mode. if constexpr (sizeof(T) >= 4) { - Passed &= testACC(Q, 3, CacheProps); - Passed &= testACC(Q, 3, CacheProps); - Passed &= testACC(Q, 3, CacheProps); - Passed &= testACC(Q, 3, CacheProps); - Passed &= testACC(Q, 3, CacheProps); - Passed &= testACC(Q, 3, CacheProps); + Passed &= testACC(Q, 3, CacheProps); + Passed &= testACC(Q, 3, CacheProps); + Passed &= testACC(Q, 3, CacheProps); + Passed &= testACC(Q, 3, CacheProps); + Passed &= testACC(Q, 3, CacheProps); } return Passed; } diff --git a/sycl/test/esimd/memory_properties.cpp b/sycl/test/esimd/memory_properties.cpp index 0adb42749984f..8db5811cfe262 100644 --- a/sycl/test/esimd/memory_properties.cpp +++ b/sycl/test/esimd/memory_properties.cpp @@ -1609,12 +1609,6 @@ SYCL_ESIMD_FUNCTION SYCL_EXTERNAL void test_prefetch(AccType &acc, float *ptrf, // 4) prefetch(usm, offset): same as (1) and (2) above, but with VS > 1. // 1) prefetch(usm, offsets): offsets is simd or simd_view - // CHECK-COUNT-4: call void @llvm.genx.lsc.prefetch.stateless.v32i1.v32i64(<32 x i1> {{[^)]+}}, i8 0, i8 1, i8 2, i16 1, i32 0, i8 3, i8 1, i8 1, i8 0, <32 x i64> {{[^)]+}}, i32 0) - prefetch(ptrf, ioffset_n32); - prefetch(ptrf, ioffset_n32_view); - - prefetch(ptrf, loffset_n32); - prefetch(ptrf, loffset_n32_view); // CHECK-COUNT-4: call void @llvm.genx.lsc.prefetch.stateless.v32i1.v32i64(<32 x i1> {{[^)]+}}, i8 0, i8 2, i8 1, i16 1, i32 0, i8 3, i8 1, i8 1, i8 0, <32 x i64> {{[^)]+}}, i32 0) prefetch(ptrf, ioffset_n32, props_cache_load); @@ -1624,13 +1618,6 @@ SYCL_ESIMD_FUNCTION SYCL_EXTERNAL void test_prefetch(AccType &acc, float *ptrf, prefetch(ptrf, loffset_n32_view, props_cache_load); // 2) prefetch(usm, offsets, mask): offsets is simd or simd_view - // CHECK-COUNT-4: call void @llvm.genx.lsc.prefetch.stateless.v32i1.v32i64(<32 x i1> {{[^)]+}}, i8 0, i8 1, i8 2, i16 1, i32 0, i8 3, i8 1, i8 1, i8 0, <32 x i64> {{[^)]+}}, i32 0) - prefetch(ptrf, ioffset_n32, mask_n32); - prefetch(ptrf, ioffset_n32_view, mask_n32); - - prefetch(ptrf, loffset_n32, mask_n32); - prefetch(ptrf, loffset_n32_view, mask_n32); - // CHECK-COUNT-4: call void @llvm.genx.lsc.prefetch.stateless.v32i1.v32i64(<32 x i1> {{[^)]+}}, i8 0, i8 2, i8 1, i16 1, i32 0, i8 3, i8 1, i8 1, i8 0, <32 x i64> {{[^)]+}}, i32 0) prefetch(ptrf, ioffset_n32, mask_n32, props_cache_load); prefetch(ptrf, ioffset_n32_view, mask_n32, props_cache_load); @@ -1639,16 +1626,6 @@ SYCL_ESIMD_FUNCTION SYCL_EXTERNAL void test_prefetch(AccType &acc, float *ptrf, prefetch(ptrf, loffset_n32_view, mask_n32, props_cache_load); // 3) prefetch(usm, offset): offset is scalar - // CHECK-COUNT-8: call void @llvm.genx.lsc.prefetch.stateless.v1i1.v1i64(<1 x i1> {{[^)]+}}, i8 0, i8 1, i8 2, i16 1, i32 0, i8 3, i8 1, i8 2, i8 0, <1 x i64> {{[^)]+}}, i32 0) - __ESIMD_NS::prefetch(ptrf); - __ESIMD_NS::prefetch(ptrf, byte_offset32); - __ESIMD_NS::prefetch(ptrf, byte_offset64); - __ESIMD_NS::prefetch(ptrf, mask_n1); - __ESIMD_NS::prefetch(ptrf, byte_offset32, mask_n1); - __ESIMD_NS::prefetch(ptrf, byte_offset64, mask_n1); - __ESIMD_NS::prefetch(ptrf, byte_offset32, mask_n1); - __ESIMD_NS::prefetch(ptrf, byte_offset64, mask_n1); - // CHECK-COUNT-8: call void @llvm.genx.lsc.prefetch.stateless.v1i1.v1i64(<1 x i1> {{[^)]+}}, i8 0, i8 2, i8 1, i16 1, i32 0, i8 3, i8 1, i8 2, i8 0, <1 x i64> {{[^)]+}}, i32 0) __ESIMD_NS::prefetch(ptrf, byte_offset32, props_cache_load); __ESIMD_NS::prefetch(ptrf, byte_offset64, props_cache_load); @@ -1660,13 +1637,6 @@ SYCL_ESIMD_FUNCTION SYCL_EXTERNAL void test_prefetch(AccType &acc, float *ptrf, __ESIMD_NS::prefetch(ptrf, byte_offset64, mask_n1, props_cache_load); // 4) prefetch(usm, ...): same as (1), (2) above, but with VS > 1. - // CHECK-COUNT-4: call void @llvm.genx.lsc.prefetch.stateless.v16i1.v16i64(<16 x i1> {{[^)]+}}, i8 0, i8 1, i8 2, i16 1, i32 0, i8 3, i8 2, i8 1, i8 0, <16 x i64> {{[^)]+}}, i32 0) - prefetch(ptrf, ioffset_n16); - prefetch(ptrf, ioffset_n16_view); - - prefetch(ptrf, loffset_n16); - prefetch(ptrf, loffset_n16_view); - // CHECK-COUNT-4: call void @llvm.genx.lsc.prefetch.stateless.v16i1.v16i64(<16 x i1> {{[^)]+}}, i8 0, i8 2, i8 1, i16 1, i32 0, i8 3, i8 2, i8 1, i8 0, <16 x i64> {{[^)]+}}, i32 0) prefetch(ptrf, ioffset_n16, props_cache_load); prefetch(ptrf, ioffset_n16_view, props_cache_load); @@ -1674,13 +1644,6 @@ SYCL_ESIMD_FUNCTION SYCL_EXTERNAL void test_prefetch(AccType &acc, float *ptrf, prefetch(ptrf, loffset_n16, props_cache_load); prefetch(ptrf, loffset_n16_view, props_cache_load); - // CHECK-COUNT-4: call void @llvm.genx.lsc.prefetch.stateless.v16i1.v16i64(<16 x i1> {{[^)]+}}, i8 0, i8 1, i8 2, i16 1, i32 0, i8 3, i8 2, i8 1, i8 0, <16 x i64> {{[^)]+}}, i32 0) - prefetch(ptrf, ioffset_n16, mask_n16); - prefetch(ptrf, ioffset_n16_view, mask_n16); - - prefetch(ptrf, loffset_n16, mask_n16); - prefetch(ptrf, loffset_n16_view, mask_n16); - // CHECK-COUNT-4: call void @llvm.genx.lsc.prefetch.stateless.v16i1.v16i64(<16 x i1> {{[^)]+}}, i8 0, i8 2, i8 1, i16 1, i32 0, i8 3, i8 2, i8 1, i8 0, <16 x i64> {{[^)]+}}, i32 0) prefetch(ptrf, ioffset_n16, mask_n16, props_cache_load); prefetch(ptrf, ioffset_n16_view, mask_n16, props_cache_load); @@ -1688,9 +1651,9 @@ SYCL_ESIMD_FUNCTION SYCL_EXTERNAL void test_prefetch(AccType &acc, float *ptrf, prefetch(ptrf, loffset_n16, mask_n16, props_cache_load); prefetch(ptrf, loffset_n16_view, mask_n16, props_cache_load); - // CHECK-COUNT-2: call void @llvm.genx.lsc.prefetch.stateless.v1i1.v1i64(<1 x i1> {{[^)]+}}, i8 0, i8 1, i8 2, i16 1, i32 0, i8 3, i8 7, i8 2, i8 0, <1 x i64> {{[^)]+}}, i32 0) - __ESIMD_NS::prefetch(ptrf, 0); - __ESIMD_NS::prefetch(ptrf, 0, 1); + // CHECK-COUNT-2: call void @llvm.genx.lsc.prefetch.stateless.v1i1.v1i64(<1 x i1> {{[^)]+}}, i8 0, i8 2, i8 1, i16 1, i32 0, i8 3, i8 7, i8 2, i8 0, <1 x i64> {{[^)]+}}, i32 0) + __ESIMD_NS::prefetch(ptrf, 0, props_cache_load); + __ESIMD_NS::prefetch(ptrf, 0, 1, props_cache_load); // Test Acc prefetch using this plan: // 1) prefetch(acc, offsets): offsets is simd or simd_view @@ -1699,36 +1662,18 @@ SYCL_ESIMD_FUNCTION SYCL_EXTERNAL void test_prefetch(AccType &acc, float *ptrf, // 4) prefetch(acc, offset): same as (1) and (2) above, but with VS > 1. // 1) prefetch(acc, offsets): offsets is simd or simd_view - // CHECK-STATEFUL-COUNT-2: call void @llvm.genx.lsc.prefetch.bti.v32i1.v32i32(<32 x i1> {{[^)]+}}, i8 0, i8 1, i8 2, i16 1, i32 0, i8 3, i8 1, i8 1, i8 0, <32 x i32> {{[^)]+}}, i32 {{[^)]+}}) - // CHECK-STATELESS-COUNT-2: call void @llvm.genx.lsc.prefetch.stateless.v32i1.v32i64(<32 x i1> {{[^)]+}}, i8 0, i8 1, i8 2, i16 1, i32 0, i8 3, i8 1, i8 1, i8 0, <32 x i64> {{[^)]+}}, i32 0) - prefetch(acc, ioffset_n32); - prefetch(acc, ioffset_n32_view); - // CHECK-STATEFUL-COUNT-2: call void @llvm.genx.lsc.prefetch.bti.v32i1.v32i32(<32 x i1> {{[^)]+}}, i8 0, i8 2, i8 1, i16 1, i32 0, i8 3, i8 1, i8 1, i8 0, <32 x i32> {{[^)]+}}, i32 {{[^)]+}}) // CHECK-STATELESS-COUNT-2: call void @llvm.genx.lsc.prefetch.stateless.v32i1.v32i64(<32 x i1> {{[^)]+}}, i8 0, i8 2, i8 1, i16 1, i32 0, i8 3, i8 1, i8 1, i8 0, <32 x i64> {{[^)]+}}, i32 0) prefetch(acc, ioffset_n32, props_cache_load); prefetch(acc, ioffset_n32_view, props_cache_load); // 2) prefetch(acc, offsets, mask): offsets is simd or simd_view - // CHECK-STATEFUL-COUNT-2: call void @llvm.genx.lsc.prefetch.bti.v32i1.v32i32(<32 x i1> {{[^)]+}}, i8 0, i8 1, i8 2, i16 1, i32 0, i8 3, i8 1, i8 1, i8 0, <32 x i32> {{[^)]+}}, i32 {{[^)]+}}) - // CHECK-STATELESS-COUNT-2: call void @llvm.genx.lsc.prefetch.stateless.v32i1.v32i64(<32 x i1> {{[^)]+}}, i8 0, i8 1, i8 2, i16 1, i32 0, i8 3, i8 1, i8 1, i8 0, <32 x i64> {{[^)]+}}, i32 0) - prefetch(acc, ioffset_n32, mask_n32); - prefetch(acc, ioffset_n32_view, mask_n32); - // CHECK-STATEFUL-COUNT-2: call void @llvm.genx.lsc.prefetch.bti.v32i1.v32i32(<32 x i1> {{[^)]+}}, i8 0, i8 2, i8 1, i16 1, i32 0, i8 3, i8 1, i8 1, i8 0, <32 x i32> {{[^)]+}}, i32 {{[^)]+}}) // CHECK-STATELESS-COUNT-2: call void @llvm.genx.lsc.prefetch.stateless.v32i1.v32i64(<32 x i1> {{[^)]+}}, i8 0, i8 2, i8 1, i16 1, i32 0, i8 3, i8 1, i8 1, i8 0, <32 x i64> {{[^)]+}}, i32 0) prefetch(acc, ioffset_n32, mask_n32, props_cache_load); prefetch(acc, ioffset_n32_view, mask_n32, props_cache_load); // 3) prefetch(acc, offset): offset is scalar - // CHECK-STATEFUL-COUNT-5: call void @llvm.genx.lsc.prefetch.bti.v1i1.v1i32(<1 x i1> {{[^)]+}}, i8 0, i8 1, i8 2, i16 1, i32 0, i8 3, i8 1, i8 2, i8 0, <1 x i32> {{[^)]+}}, i32 {{[^)]+}}) - // CHECK-STATELESS-COUNT-5: call void @llvm.genx.lsc.prefetch.stateless.v1i1.v1i64(<1 x i1> {{[^)]+}}, i8 0, i8 1, i8 2, i16 1, i32 0, i8 3, i8 1, i8 2, i8 0, <1 x i64> {{[^)]+}}, i32 0) - prefetch(acc); - prefetch(acc, byte_offset32); - prefetch(acc, mask_n1); - prefetch(acc, byte_offset32, mask_n1); - prefetch(acc, byte_offset32, mask_n1); - // CHECK-STATEFUL-COUNT-5: call void @llvm.genx.lsc.prefetch.bti.v1i1.v1i32(<1 x i1> {{[^)]+}}, i8 0, i8 2, i8 1, i16 1, i32 0, i8 3, i8 1, i8 2, i8 0, <1 x i32> {{[^)]+}}, i32 {{[^)]+}}) // CHECK-STATELESS-COUNT-5: call void @llvm.genx.lsc.prefetch.stateless.v1i1.v1i64(<1 x i1> {{[^)]+}}, i8 0, i8 2, i8 1, i16 1, i32 0, i8 3, i8 1, i8 2, i8 0, <1 x i64> {{[^)]+}}, i32 0) prefetch(acc, byte_offset32, props_cache_load); @@ -1738,28 +1683,18 @@ SYCL_ESIMD_FUNCTION SYCL_EXTERNAL void test_prefetch(AccType &acc, float *ptrf, prefetch(acc, byte_offset32, mask_n1, props_cache_load); // 4) prefetch(usm, ...): same as (1), (2) above, but with VS > 1. - // CHECK-STATEFUL-COUNT-2: call void @llvm.genx.lsc.prefetch.bti.v16i1.v16i32(<16 x i1> {{[^)]+}}, i8 0, i8 1, i8 2, i16 1, i32 0, i8 3, i8 2, i8 1, i8 0, <16 x i32> {{[^)]+}}, i32 {{[^)]+}}) - // CHECK-STATELESS-COUNT-2: call void @llvm.genx.lsc.prefetch.stateless.v16i1.v16i64(<16 x i1> {{[^)]+}}, i8 0, i8 1, i8 2, i16 1, i32 0, i8 3, i8 2, i8 1, i8 0, <16 x i64> {{[^)]+}}, i32 0) - prefetch(acc, ioffset_n16); - prefetch(acc, ioffset_n16_view); - // CHECK-STATEFUL-COUNT-2: call void @llvm.genx.lsc.prefetch.bti.v16i1.v16i32(<16 x i1> {{[^)]+}}, i8 0, i8 2, i8 1, i16 1, i32 0, i8 3, i8 2, i8 1, i8 0, <16 x i32> {{[^)]+}}, i32 {{[^)]+}}) // CHECK-STATELESS-COUNT-2: call void @llvm.genx.lsc.prefetch.stateless.v16i1.v16i64(<16 x i1> {{[^)]+}}, i8 0, i8 2, i8 1, i16 1, i32 0, i8 3, i8 2, i8 1, i8 0, <16 x i64> {{[^)]+}}, i32 0) prefetch(acc, ioffset_n16, props_cache_load); prefetch(acc, ioffset_n16_view, props_cache_load); - // CHECK-STATEFUL-COUNT-2: call void @llvm.genx.lsc.prefetch.bti.v16i1.v16i32(<16 x i1> {{[^)]+}}, i8 0, i8 1, i8 2, i16 1, i32 0, i8 3, i8 2, i8 1, i8 0, <16 x i32> {{[^)]+}}, i32 {{[^)]+}}) - // CHECK-STATELESS-COUNT-2: call void @llvm.genx.lsc.prefetch.stateless.v16i1.v16i64(<16 x i1> {{[^)]+}}, i8 0, i8 1, i8 2, i16 1, i32 0, i8 3, i8 2, i8 1, i8 0, <16 x i64> {{[^)]+}}, i32 0) - prefetch(acc, ioffset_n16, mask_n16); - prefetch(acc, ioffset_n16_view, mask_n16); - // CHECK-STATEFUL-COUNT-2: call void @llvm.genx.lsc.prefetch.bti.v16i1.v16i32(<16 x i1> {{[^)]+}}, i8 0, i8 2, i8 1, i16 1, i32 0, i8 3, i8 2, i8 1, i8 0, <16 x i32> {{[^)]+}}, i32 {{[^)]+}}) // CHECK-STATELESS-COUNT-2: call void @llvm.genx.lsc.prefetch.stateless.v16i1.v16i64(<16 x i1> {{[^)]+}}, i8 0, i8 2, i8 1, i16 1, i32 0, i8 3, i8 2, i8 1, i8 0, <16 x i64> {{[^)]+}}, i32 0) prefetch(acc, ioffset_n16, mask_n16, props_cache_load); prefetch(acc, ioffset_n16_view, mask_n16, props_cache_load); - // CHECK-STATEFUL-COUNT-2: call void @llvm.genx.lsc.prefetch.bti.v1i1.v1i32(<1 x i1> {{[^)]+}}, i8 0, i8 1, i8 2, i16 1, i32 0, i8 3, i8 7, i8 2, i8 0, <1 x i32> {{[^)]+}}, i32 {{[^)]+}}) - // CHECK-STATELESS-COUNT-2: call void @llvm.genx.lsc.prefetch.stateless.v1i1.v1i64(<1 x i1> {{[^)]+}}, i8 0, i8 1, i8 2, i16 1, i32 0, i8 3, i8 7, i8 2, i8 0, <1 x i64> {{[^)]+}}, i32 0) - prefetch(acc, 0); - prefetch(acc, 0, 1); + // CHECK-STATEFUL-COUNT-2: call void @llvm.genx.lsc.prefetch.bti.v1i1.v1i32(<1 x i1> {{[^)]+}}, i8 0, i8 2, i8 1, i16 1, i32 0, i8 3, i8 7, i8 2, i8 0, <1 x i32> {{[^)]+}}, i32 {{[^)]+}}) + // CHECK-STATELESS-COUNT-2: call void @llvm.genx.lsc.prefetch.stateless.v1i1.v1i64(<1 x i1> {{[^)]+}}, i8 0, i8 2, i8 1, i16 1, i32 0, i8 3, i8 7, i8 2, i8 0, <1 x i64> {{[^)]+}}, i32 0) + prefetch(acc, 0, props_cache_load); + prefetch(acc, 0, 1, props_cache_load); } \ No newline at end of file