diff --git a/include/bitcoin/system/hash/sha/algorithm.hpp b/include/bitcoin/system/hash/sha/algorithm.hpp index 84b2b00d6b..48caa62dac 100644 --- a/include/bitcoin/system/hash/sha/algorithm.hpp +++ b/include/bitcoin/system/hash/sha/algorithm.hpp @@ -365,6 +365,19 @@ class algorithm INLINE static void schedule_native(xbuffer_t& xbuffer) NOEXCEPT; INLINE static void schedule_native(buffer_t& buffer) NOEXCEPT; + template + INLINE static void round_native(wstate_t& state, + const wbuffer_t& wk) NOEXCEPT; + + INLINE static void shuffle(wstate_t& wstate) NOEXCEPT; + INLINE static void unshuffle(wstate_t& wstate) NOEXCEPT; + INLINE static void summarize_native(wstate_t& out, + const wstate_t& in) NOEXCEPT; + + template + INLINE static void compress_native(wstate_t& state, + const wbuffer_t& wbuffer) NOEXCEPT; + template INLINE static void compress_native(xstate_t& xstate, const xbuffer_t& xbuffer) NOEXCEPT; diff --git a/include/bitcoin/system/impl/hash/sha/algorithm_compress.ipp b/include/bitcoin/system/impl/hash/sha/algorithm_compress.ipp index ada7bae8f4..f9da36fd0f 100644 --- a/include/bitcoin/system/impl/hash/sha/algorithm_compress.ipp +++ b/include/bitcoin/system/impl/hash/sha/algorithm_compress.ipp @@ -173,7 +173,6 @@ template constexpr void CLASS:: compress_(auto& state, const auto& buffer) NOEXCEPT { - // SHA-NI/256: 64/4 = 16 quad rounds, 8/4 = 2 state elements. // This is a copy (state type varies due to vectorization). const auto start = state; diff --git a/include/bitcoin/system/impl/hash/sha/algorithm_native.ipp b/include/bitcoin/system/impl/hash/sha/algorithm_native.ipp index a7c2d20e4b..a26366ea48 100644 --- a/include/bitcoin/system/impl/hash/sha/algorithm_native.ipp +++ b/include/bitcoin/system/impl/hash/sha/algorithm_native.ipp @@ -35,6 +35,10 @@ namespace libbitcoin { namespace system { namespace sha { + +// schedule +// ---------------------------------------------------------------------------- +// protected TEMPLATE template @@ -43,19 +47,21 @@ prepare_native(wbuffer_t& wbuffer) NOEXCEPT { if constexpr (SHA::strength == 160) { - ////static_assert(false, "sha160 not implemented"); - } - else if constexpr (use_neon) - { - static_assert(SHA::strength == 256); - - ////static_assert(false, "neon not implemented"); + if constexpr (use_neon) + { + } + else if constexpr (use_shani) + { + } } - else if constexpr (use_shani) + else if constexpr (SHA::strength == 256) { - static_assert(SHA::strength == 256); - - wbuffer[Round] = mm_sha256msg2_epu32 + if constexpr (use_neon) + { + } + else if constexpr (use_shani) + { + wbuffer[Round] = mm_sha256msg2_epu32 ( mm_add_epi32 ( @@ -70,6 +76,7 @@ prepare_native(wbuffer_t& wbuffer) NOEXCEPT ), wbuffer[Round - 1] ); + } } } @@ -101,16 +108,12 @@ schedule(wbuffer_t& wbuffer) NOEXCEPT konstant(array_cast(wbuffer)); } -// schedule -// ---------------------------------------------------------------------------- -// protected - TEMPLATE INLINE void CLASS:: schedule_native(buffer_t& buffer) NOEXCEPT { // neon and sha160 not yet implemented, sha512 is not native. - if constexpr (SHA::strength != 160 && SHA::strength != 512 && !use_neon) + if constexpr (SHA::strength == 256 && !use_neon) { schedule(array_cast(buffer)); } @@ -133,6 +136,134 @@ schedule_native(xbuffer_t& xbuffer) NOEXCEPT // ---------------------------------------------------------------------------- // protected +TEMPLATE +template +INLINE void CLASS:: +round_native(wstate_t& state, + const wbuffer_t& wk) NOEXCEPT +{ + if constexpr (SHA::strength == 160) + { + if constexpr (use_neon) + { + } + else if constexpr (use_shani) + { + } + } + else if constexpr (SHA::strength == 256) + { + if constexpr (use_neon) + { + } + else if constexpr (use_shani) + { + // Process wk[Round][0..1], [HGDC][FEBA] (initial state) + state[1] = mm_sha256rnds2_epu32(state[1], state[0], wk[Round]); + + // Process wk[Round][2..3] (shifted down) + state[0] = mm_sha256rnds2_epu32(state[0], state[1], + mm_shuffle_epi32(wk[Round], 0x0e)); + } + } +} + +TEMPLATE +INLINE void CLASS:: +summarize_native(wstate_t& out, + const wstate_t& in) NOEXCEPT +{ + if constexpr (SHA::strength == 160) + { + if constexpr (use_neon) + { + } + else if constexpr (use_shani) + { + } + } + else if constexpr (SHA::strength == 256) + { + if constexpr (use_neon) + { + } + else if constexpr (use_shani) + { + out[0] = mm_add_epi32(out[0], in[0]); + out[1] = mm_add_epi32(out[1], in[1]); + } + } +} + +TEMPLATE +INLINE void CLASS:: +shuffle(wstate_t& wstate) NOEXCEPT +{ + // Change wstate to mm_sha256rnds2_epu32 expected form: + // [ABCD][EFGH] -> [FEBA][HGDC] (ordered low to high). + const auto t1 = mm_shuffle_epi32(wstate[0], 0xb1); + const auto t2 = mm_shuffle_epi32(wstate[1], 0x1b); + wstate[0] = mm_alignr_epi8(t1, t2, 8); + wstate[1] = mm_blend_epi16(t2, t1, 15); +} + +TEMPLATE +INLINE void CLASS:: +unshuffle(wstate_t& wstate) NOEXCEPT +{ + // Restore wstate to normal form: + // [FEBA][HGDC] -> [ABCD][EFGH] (ordered low to high). + const auto t1 = mm_shuffle_epi32(wstate[0], 0x1b); + const auto t2 = mm_shuffle_epi32(wstate[1], 0xb1); + wstate[0] = mm_blend_epi16(t1, t2, 15); + wstate[1] = mm_alignr_epi8(t2, t1, 8); +} + +TEMPLATE +template +INLINE void CLASS:: +compress_native(wstate_t& wstate, + const wbuffer_t& wbuffer) NOEXCEPT +{ + // Shuffle and unshuffle can be done outside of all blocks, but this would + // leave state in a non-normal form, so presently absorbing that cost. + shuffle(wstate); + + // This is a copy. + const auto start = wstate; + + round_native< 0, Lane>(wstate, wbuffer); + round_native< 1, Lane>(wstate, wbuffer); + round_native< 2, Lane>(wstate, wbuffer); + round_native< 3, Lane>(wstate, wbuffer); + round_native< 4, Lane>(wstate, wbuffer); + round_native< 5, Lane>(wstate, wbuffer); + round_native< 6, Lane>(wstate, wbuffer); + round_native< 7, Lane>(wstate, wbuffer); + round_native< 8, Lane>(wstate, wbuffer); + round_native< 9, Lane>(wstate, wbuffer); + round_native<10, Lane>(wstate, wbuffer); + round_native<11, Lane>(wstate, wbuffer); + round_native<12, Lane>(wstate, wbuffer); + round_native<13, Lane>(wstate, wbuffer); + round_native<14, Lane>(wstate, wbuffer); + round_native<15, Lane>(wstate, wbuffer); + + if constexpr (SHA::rounds == 80) + { + round_native<16, Lane>(wstate, wbuffer); + round_native<17, Lane>(wstate, wbuffer); + round_native<18, Lane>(wstate, wbuffer); + round_native<19, Lane>(wstate, wbuffer); + } + + // This is just a vectorized version of summarize(). + summarize_native(wstate, start); + + // See above comments on shuffle(). + unshuffle(wstate); +} + TEMPLATE template INLINE void CLASS:: @@ -157,8 +288,17 @@ template INLINE void CLASS:: compress_native(state_t& state, const buffer_t& buffer) NOEXCEPT { - // TODO: Single block compression. - compress_(state, buffer); + // TODO: sha160 state is too small to array cast into two xwords. + // neon and sha160 not yet implemented, sha512 is not native. + if constexpr (SHA::strength == 256 && !use_neon) + { + compress_native(array_cast(state), + array_cast(buffer)); + } + else + { + compress_(state, buffer); + } } } // namespace sha diff --git a/include/bitcoin/system/intrinsics/xcpu/defines.hpp b/include/bitcoin/system/intrinsics/xcpu/defines.hpp index 79e5b06334..7594327053 100644 --- a/include/bitcoin/system/intrinsics/xcpu/defines.hpp +++ b/include/bitcoin/system/intrinsics/xcpu/defines.hpp @@ -133,6 +133,8 @@ BC_POP_WARNING() #define mm_extract_epi32(a, Lane) {} #define mm_extract_epi64(a, Lane) {} #define mm_shuffle_epi8(a, mask) (a) + #define mm_shuffle_epi32(a, mask) (a) + #define mm_blend_epi16(a, b, mask) (a) #define mm_load_si128(a) {} #define mm_loadu_si128(a) {} #define mm_store_si128(memory, a) @@ -167,6 +169,8 @@ BC_POP_WARNING() #define mm_extract_epi32(a, Lane) _mm_extract_epi32(a, Lane) #define mm_extract_epi64(a, Lane) _mm_extract_epi64(a, Lane) // undefined for X32 #define mm_shuffle_epi8(a, mask) _mm_shuffle_epi8(a, mask) + #define mm_shuffle_epi32(a, mask) _mm_shuffle_epi32(a, mask) + #define mm_blend_epi16(a, b, mask) _mm_blend_epi16(a, b, mask) #define mm_load_si128(a) _mm_load_si128(a) #define mm_loadu_si128(a) _mm_loadu_si128(a) #define mm_store_si128(memory, a) _mm_store_si128(memory, a)