From 239f1aaf28fac8631c71f6ba40ac61431bdeeae7 Mon Sep 17 00:00:00 2001 From: Vyacheslav Klochkov Date: Thu, 4 Apr 2024 15:22:28 -0500 Subject: [PATCH] [ESIMD][NFC] Add function extracting L1/L2 hint for esimd/genx intrin (#13275) Signed-off-by: Klochkov, Vyacheslav N Co-authored-by: Nick Sarnie --- sycl/include/sycl/ext/intel/esimd/common.hpp | 18 +-- sycl/include/sycl/ext/intel/esimd/memory.hpp | 146 ++++++++----------- 2 files changed, 67 insertions(+), 97 deletions(-) diff --git a/sycl/include/sycl/ext/intel/esimd/common.hpp b/sycl/include/sycl/ext/intel/esimd/common.hpp index 155e3615dfdc4..e0ed3c2db0a15 100644 --- a/sycl/include/sycl/ext/intel/esimd/common.hpp +++ b/sycl/include/sycl/ext/intel/esimd/common.hpp @@ -545,7 +545,8 @@ template class cache_hint_wrap { } }; -constexpr bool are_both(cache_hint First, cache_hint Second, cache_hint Val) { +template +constexpr bool are_all(cache_hint First, cache_hint Second) { return First == Val && Second == Val; } @@ -559,9 +560,8 @@ template constexpr bool has_cache_hints() { return L1H != cache_hint::none || L2H != cache_hint::none; } -// Currently, this is just a wrapper around 'check_cache_hint' function. -// It accepts the compile-time properties that may include cache-hints -// to be verified. +// Verifies cache-hint properties from 'PropertyListT`. The parameter 'Action' +// specifies the usage context. template void check_cache_hints() { constexpr auto L1H = @@ -576,11 +576,11 @@ void check_cache_hints() { cache_hint::streaming>() && L2H.template is_one_of() && - !are_both(L1H, L2H, cache_hint::uncached), + !are_all(L1H, L2H), "unsupported cache hint"); } else if constexpr (Action == cache_action::load) { static_assert( - are_both(L1H, L2H, cache_hint::none) || + are_all(L1H, L2H) || (L1H.template is_one_of() && L2H.template is_one_of(L1H, L2H) || + are_all(L1H, L2H) || (L1H.template is_one_of() && @@ -597,7 +597,7 @@ void check_cache_hints() { cache_hint::write_back>()), "unsupported cache hint"); } else if constexpr (Action == cache_action::atomic) { - static_assert(are_both(L1H, L2H, cache_hint::none) || + static_assert(are_all(L1H, L2H) || (L1H == cache_hint::uncached && L2H.template is_one_of()), diff --git a/sycl/include/sycl/ext/intel/esimd/memory.hpp b/sycl/include/sycl/ext/intel/esimd/memory.hpp index 5edd6d657b501..9193b7daf42a0 100644 --- a/sycl/include/sycl/ext/intel/esimd/memory.hpp +++ b/sycl/include/sycl/ext/intel/esimd/memory.hpp @@ -95,6 +95,20 @@ ESIMD_INLINE simd lsc_format_ret(simd Vals) { } } +/// Extracts a cache hint with the given 'Level' to pass it to +/// ESIMD/GENX intrinsics. If `PropertyListT` does not have the requested +/// cache-hint, then 'cache_hint::none' is returned. +template +constexpr cache_hint getCacheHintForIntrin() { + static_assert(Level == cache_level::L1 || Level == cache_level::L2, + "ESIMD/GENX intrinsics accept only L1/L2 cache hints"); + if constexpr (Level == cache_level::L1) { + return getPropertyValue(cache_hint::none); + } else { + return getPropertyValue(cache_hint::none); + } +} + /// USM pointer gather. /// Supported platforms: DG2, PVC /// VISA instruction: lsc_load.ugm @@ -123,10 +137,8 @@ __ESIMD_API simd gather_impl(const T *p, simd offsets, check_lsc_vector_size(); check_lsc_data_size(); check_cache_hints(); - constexpr cache_hint L1H = - getPropertyValue(cache_hint::none); - constexpr cache_hint L2H = - getPropertyValue(cache_hint::none); + constexpr auto L1H = getCacheHintForIntrin(); + constexpr auto L2H = getCacheHintForIntrin(); constexpr uint16_t AddressScale = 1; constexpr int ImmOffset = 0; constexpr lsc_data_size EDS = expand_data_size(finalize_data_size()); @@ -167,10 +179,8 @@ __ESIMD_API void scatter_impl(T *p, simd offsets, check_lsc_vector_size(); check_lsc_data_size(); check_cache_hints(); - constexpr cache_hint L1H = - getPropertyValue(cache_hint::none); - constexpr cache_hint L2H = - getPropertyValue(cache_hint::none); + constexpr auto L1H = getCacheHintForIntrin(); + constexpr auto L2H = getCacheHintForIntrin(); constexpr uint16_t AddressScale = 1; constexpr int ImmOffset = 0; constexpr lsc_data_size EDS = expand_data_size(finalize_data_size()); @@ -904,10 +914,8 @@ block_load_impl(const T *p, simd_mask<1> pred, simd pass_thru) { using LoadElemT = __ESIMD_DNS::__raw_t< std::conditional_t>>; - constexpr cache_hint L1H = - getPropertyValue(cache_hint::none); - constexpr cache_hint L2H = - getPropertyValue(cache_hint::none); + constexpr auto L1H = getCacheHintForIntrin(); + constexpr auto L2H = getCacheHintForIntrin(); constexpr uint16_t AddressScale = 1; constexpr int ImmOffset = 0; @@ -1005,10 +1013,8 @@ __ESIMD_API using LoadElemT = __ESIMD_DNS::__raw_t< std::conditional_t>>; - constexpr cache_hint L1H = - getPropertyValue(cache_hint::none); - constexpr cache_hint L2H = - getPropertyValue(cache_hint::none); + constexpr auto L1H = getCacheHintForIntrin(); + constexpr auto L2H = getCacheHintForIntrin(); constexpr uint16_t AddressScale = 1; constexpr int ImmOffset = 0; constexpr lsc_data_size ActualDS = @@ -1105,10 +1111,8 @@ __ESIMD_API using LoadElemT = __ESIMD_DNS::__raw_t< std::conditional_t>>; - constexpr cache_hint L1H = - getPropertyValue(cache_hint::none); - constexpr cache_hint L2H = - getPropertyValue(cache_hint::none); + constexpr auto L1H = getCacheHintForIntrin(); + constexpr auto L2H = getCacheHintForIntrin(); constexpr uint16_t AddressScale = 1; constexpr int ImmOffset = 0; constexpr lsc_data_size ActualDS = @@ -1165,10 +1169,8 @@ block_store_impl(T *p, simd vals, simd_mask<1> pred) { using StoreType = __ESIMD_DNS::__raw_t< std::conditional_t>>; - constexpr cache_hint L1H = - getPropertyValue(cache_hint::none); - constexpr cache_hint L2H = - getPropertyValue(cache_hint::none); + constexpr auto L1H = getCacheHintForIntrin(); + constexpr auto L2H = getCacheHintForIntrin(); constexpr uint16_t AddressScale = 1; constexpr int ImmOffset = 0; constexpr lsc_data_size ActualDS = @@ -1230,10 +1232,8 @@ __ESIMD_API using StoreElemT = __ESIMD_DNS::__raw_t< std::conditional_t>>; - constexpr cache_hint L1H = - getPropertyValue(cache_hint::none); - constexpr cache_hint L2H = - getPropertyValue(cache_hint::none); + constexpr auto L1H = getCacheHintForIntrin(); + constexpr auto L2H = getCacheHintForIntrin(); constexpr uint16_t AddressScale = 1; constexpr int ImmOffset = 0; constexpr lsc_data_size ActualDS = @@ -2586,10 +2586,8 @@ scatter_impl(AccessorTy acc, simd offsets, simd vals, check_lsc_vector_size(); check_lsc_data_size(); check_cache_hints(); - constexpr cache_hint L1H = - getPropertyValue(cache_hint::none); - constexpr cache_hint L2H = - getPropertyValue(cache_hint::none); + constexpr auto L1H = getCacheHintForIntrin(); + constexpr auto L2H = getCacheHintForIntrin(); constexpr uint16_t AddressScale = 1; constexpr int ImmOffset = 0; constexpr lsc_data_size EDS = expand_data_size(finalize_data_size()); @@ -2686,10 +2684,8 @@ gather_impl(AccessorT acc, simd byte_offsets, constexpr lsc_vector_size LSCVS = to_lsc_vector_size(); constexpr auto Transposed = lsc_data_order::nontranspose; using MsgT = typename lsc_expand_type::type; - constexpr cache_hint L1H = - getPropertyValue(cache_hint::none); - constexpr cache_hint L2H = - getPropertyValue(cache_hint::none); + constexpr auto L1H = getCacheHintForIntrin(); + constexpr auto L2H = getCacheHintForIntrin(); auto SI = get_surface_index(acc); simd ByteOffsets32 = convert(byte_offsets); simd PassThruExpanded = lsc_format_input(pass_thru); @@ -2793,10 +2789,8 @@ __ESIMD_API void prefetch_impl(const T *p, simd byte_offsets, check_lsc_vector_size(); check_lsc_data_size(); check_cache_hints(); - constexpr cache_hint L1H = - getPropertyValue(cache_hint::none); - constexpr cache_hint L2H = - getPropertyValue(cache_hint::none); + constexpr auto L1H = getCacheHintForIntrin(); + constexpr auto L2H = getCacheHintForIntrin(); constexpr uint16_t AddressScale = 1; constexpr int ImmOffset = 0; constexpr lsc_data_size EDS = expand_data_size(finalize_data_size()); @@ -2817,10 +2811,8 @@ prefetch_impl(const T *p, Toffset offset, simd_mask<1> pred) { check_lsc_vector_size(); check_lsc_data_size(); check_cache_hints(); - constexpr cache_hint L1H = - getPropertyValue(cache_hint::none); - constexpr cache_hint L2H = - getPropertyValue(cache_hint::none); + constexpr auto L1H = getCacheHintForIntrin(); + constexpr auto L2H = getCacheHintForIntrin(); constexpr uint16_t AddressScale = 1; constexpr int ImmOffset = 0; constexpr lsc_data_size EDS = finalize_data_size(); @@ -2872,10 +2864,8 @@ prefetch_impl(AccessorTy acc, simd byte_offsets, check_lsc_vector_size(); check_lsc_data_size(); check_cache_hints(); - constexpr cache_hint L1H = - getPropertyValue(cache_hint::none); - constexpr cache_hint L2H = - getPropertyValue(cache_hint::none); + constexpr auto L1H = getCacheHintForIntrin(); + constexpr auto L2H = getCacheHintForIntrin(); constexpr uint16_t AddressScale = 1; constexpr int ImmOffset = 0; constexpr lsc_data_size EDS = expand_data_size(finalize_data_size()); @@ -2919,10 +2909,8 @@ prefetch_impl(AccessorTy acc, OffsetT byte_offset, simd_mask<1> pred) { check_lsc_vector_size(); check_lsc_data_size(); check_cache_hints(); - constexpr cache_hint L1H = - getPropertyValue(cache_hint::none); - constexpr cache_hint L2H = - getPropertyValue(cache_hint::none); + constexpr auto L1H = getCacheHintForIntrin(); + constexpr auto L2H = getCacheHintForIntrin(); constexpr uint16_t AddressScale = 1; constexpr int ImmOffset = 0; constexpr lsc_data_size EDS = finalize_data_size(); @@ -3058,10 +3046,8 @@ __ESIMD_API simd load_2d_impl(const T *Ptr, unsigned SurfaceWidth, unsigned SurfacePitch, int X, int Y) { check_cache_hints(); - constexpr cache_hint L1H = - getPropertyValue(cache_hint::none); - constexpr cache_hint L2H = - getPropertyValue(cache_hint::none); + constexpr auto L1H = getCacheHintForIntrin(); + constexpr auto L2H = getCacheHintForIntrin(); using RawT = __raw_t; check_lsc_block_2d_restrictions(); @@ -3172,10 +3158,8 @@ __ESIMD_API void prefetch_2d_impl(const T *Ptr, unsigned SurfaceWidth, check_cache_hints(); check_lsc_block_2d_restrictions(); - constexpr cache_hint L1H = - getPropertyValue(cache_hint::none); - constexpr cache_hint L2H = - getPropertyValue(cache_hint::none); + constexpr auto L1H = getCacheHintForIntrin(); + constexpr auto L2H = getCacheHintForIntrin(); constexpr lsc_data_size DS = finalize_data_size(); uintptr_t Addr = reinterpret_cast(Ptr); @@ -3220,10 +3204,8 @@ __ESIMD_API void store_2d_impl(T *Ptr, unsigned SurfaceWidth, using RawT = __raw_t; __ESIMD_DNS::check_cache_hints<__ESIMD_DNS::cache_action::store, PropertyListT>(); - constexpr cache_hint L1H = - getPropertyValue(cache_hint::none); - constexpr cache_hint L2H = - getPropertyValue(cache_hint::none); + constexpr auto L1H = getCacheHintForIntrin(); + constexpr auto L2H = getCacheHintForIntrin(); check_lsc_block_2d_restrictions(); constexpr lsc_data_size DS = @@ -6164,10 +6146,8 @@ atomic_update_impl(T *p, simd offsets, simd_mask pred) { check_atomic(); check_lsc_data_size(); check_cache_hints(); - constexpr cache_hint L1H = - getPropertyValue(cache_hint::none); - constexpr cache_hint L2H = - getPropertyValue(cache_hint::none); + constexpr auto L1H = getCacheHintForIntrin(); + constexpr auto L2H = getCacheHintForIntrin(); constexpr uint16_t AddressScale = 1; constexpr int ImmOffset = 0; constexpr lsc_data_size EDS = expand_data_size(finalize_data_size()); @@ -6208,10 +6188,8 @@ atomic_update_impl(T *p, simd offsets, simd src0, check_lsc_data_size(); check_atomic(); check_cache_hints(); - constexpr cache_hint L1H = - getPropertyValue(cache_hint::none); - constexpr cache_hint L2H = - getPropertyValue(cache_hint::none); + constexpr auto L1H = getCacheHintForIntrin(); + constexpr auto L2H = getCacheHintForIntrin(); constexpr uint16_t AddressScale = 1; constexpr int ImmOffset = 0; constexpr lsc_data_size EDS = expand_data_size(finalize_data_size()); @@ -6254,10 +6232,8 @@ atomic_update_impl(T *p, simd offsets, simd src0, check_lsc_data_size(); check_atomic(); check_cache_hints(); - constexpr cache_hint L1H = - getPropertyValue(cache_hint::none); - constexpr cache_hint L2H = - getPropertyValue(cache_hint::none); + constexpr auto L1H = getCacheHintForIntrin(); + constexpr auto L2H = getCacheHintForIntrin(); constexpr uint16_t AddressScale = 1; constexpr int ImmOffset = 0; constexpr lsc_data_size EDS = expand_data_size(finalize_data_size()); @@ -6310,10 +6286,8 @@ __ESIMD_API check_lsc_data_size(); check_atomic(); check_cache_hints(); - constexpr cache_hint L1H = - getPropertyValue(cache_hint::none); - constexpr cache_hint L2H = - getPropertyValue(cache_hint::none); + constexpr auto L1H = getCacheHintForIntrin(); + constexpr auto L2H = getCacheHintForIntrin(); constexpr uint16_t AddressScale = 1; constexpr int ImmOffset = 0; constexpr lsc_data_size EDS = expand_data_size(finalize_data_size()); @@ -6365,10 +6339,8 @@ __ESIMD_API check_lsc_data_size(); check_atomic(); check_cache_hints(); - constexpr cache_hint L1H = - getPropertyValue(cache_hint::none); - constexpr cache_hint L2H = - getPropertyValue(cache_hint::none); + constexpr auto L1H = getCacheHintForIntrin(); + constexpr auto L2H = getCacheHintForIntrin(); constexpr uint16_t AddressScale = 1; constexpr int ImmOffset = 0; constexpr lsc_data_size EDS = expand_data_size(finalize_data_size()); @@ -6422,10 +6394,8 @@ __ESIMD_API check_lsc_data_size(); check_atomic(); check_cache_hints(); - constexpr cache_hint L1H = - getPropertyValue(cache_hint::none); - constexpr cache_hint L2H = - getPropertyValue(cache_hint::none); + constexpr auto L1H = getCacheHintForIntrin(); + constexpr auto L2H = getCacheHintForIntrin(); constexpr uint16_t AddressScale = 1; constexpr int ImmOffset = 0; constexpr lsc_data_size EDS = expand_data_size(finalize_data_size());