Skip to content

Commit

Permalink
Add native sha compression.
Browse files Browse the repository at this point in the history
  • Loading branch information
evoskuil committed Nov 26, 2024
1 parent fddf944 commit 16330a6
Show file tree
Hide file tree
Showing 4 changed files with 137 additions and 19 deletions.
11 changes: 11 additions & 0 deletions include/bitcoin/system/hash/sha/algorithm.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -365,6 +365,17 @@ class algorithm
INLINE static void schedule_native(xbuffer_t<xWord>& xbuffer) NOEXCEPT;
INLINE static void schedule_native(buffer_t& buffer) NOEXCEPT;

template<size_t Round, size_t Lane>
INLINE static void round_native(wstate_t<xint128_t>& state,
const wbuffer_t<xint128_t>& wk) NOEXCEPT;

////INLINE static void summarize_native(wstate_t<xint128_t>& out,
//// const wstate_t<xint128_t>& in) NOEXCEPT;

template <size_t Lane>
INLINE static void compress_native(wstate_t<xint128_t>& state,
const wbuffer_t<xint128_t>& wbuffer) NOEXCEPT;

template <typename xWord, size_t Lane>
INLINE static void compress_native(xstate_t<xWord>& xstate,
const xbuffer_t<xWord>& xbuffer) NOEXCEPT;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,6 @@ template <size_t Lane>
constexpr void CLASS::
compress_(auto& state, const auto& buffer) NOEXCEPT
{
// SHA-NI/256: 64/4 = 16 quad rounds, 8/4 = 2 state elements.
// This is a copy (state type varies due to vectorization).
const auto start = state;

Expand Down
142 changes: 124 additions & 18 deletions include/bitcoin/system/impl/hash/sha/algorithm_native.ipp
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,10 @@
namespace libbitcoin {
namespace system {
namespace sha {

// schedule
// ----------------------------------------------------------------------------
// protected

TEMPLATE
template<size_t Round>
Expand All @@ -43,19 +47,21 @@ prepare_native(wbuffer_t<xint128_t>& wbuffer) NOEXCEPT
{
if constexpr (SHA::strength == 160)
{
////static_assert(false, "sha160 not implemented");
}
else if constexpr (use_neon)
{
static_assert(SHA::strength == 256);

////static_assert(false, "neon not implemented");
if constexpr (use_neon)
{
}
else if constexpr (use_shani)
{
}
}
else if constexpr (use_shani)
else if constexpr (SHA::strength == 256)
{
static_assert(SHA::strength == 256);

wbuffer[Round] = mm_sha256msg2_epu32
if constexpr (use_neon)
{
}
else if constexpr (use_shani)
{
wbuffer[Round] = mm_sha256msg2_epu32
(
mm_add_epi32
(
Expand All @@ -70,6 +76,7 @@ prepare_native(wbuffer_t<xint128_t>& wbuffer) NOEXCEPT
),
wbuffer[Round - 1]
);
}
}
}

Expand Down Expand Up @@ -101,16 +108,12 @@ schedule(wbuffer_t<xint128_t>& wbuffer) NOEXCEPT
konstant(array_cast<word_t>(wbuffer));
}

// schedule
// ----------------------------------------------------------------------------
// protected

TEMPLATE
INLINE void CLASS::
schedule_native(buffer_t& buffer) NOEXCEPT
{
// neon and sha160 not yet implemented, sha512 is not native.
if constexpr (SHA::strength != 160 && SHA::strength != 512 && !use_neon)
if constexpr (SHA::strength == 256 && !use_neon)
{
schedule(array_cast<xint128_t>(buffer));
}
Expand All @@ -133,6 +136,100 @@ schedule_native(xbuffer_t<xWord>& xbuffer) NOEXCEPT
// ----------------------------------------------------------------------------
// protected

TEMPLATE
template<size_t Round, size_t Lane>
INLINE void CLASS::
round_native(wstate_t<xint128_t>& state,
const wbuffer_t<xint128_t>& wk) NOEXCEPT
{
if constexpr (SHA::strength == 160)
{
if constexpr (use_neon)
{
}
else if constexpr (use_shani)
{
}
}
else if constexpr (SHA::strength == 256)
{
if constexpr (use_neon)
{
}
else if constexpr (use_shani)
{
state[1] = mm_sha256rnds2_epu32(state[1], state[0], wk[Round]);
state[0] = mm_sha256rnds2_epu32(state[0], state[1],
mm_shuffle_epi32(wk[Round], 0x0e));
}
}
}

////TEMPLATE
////INLINE void CLASS::
////summarize_native(wstate_t<xint128_t>& out,
//// const wstate_t<xint128_t>& in) NOEXCEPT
////{
//// if constexpr (SHA::strength == 160)
//// {
//// if constexpr (use_neon)
//// {
//// }
//// else if constexpr (use_shani)
//// {
//// }
//// }
//// else if constexpr (SHA::strength == 256)
//// {
//// if constexpr (use_neon)
//// {
//// }
//// else if constexpr (use_shani)
//// {
//// out[0] = mm_add_epi32(out[0], in[0]);
//// out[1] = mm_add_epi32(out[1], in[1]);
//// }
//// }
////}

TEMPLATE
template <size_t Lane>
INLINE void CLASS::
compress_native(wstate_t<xint128_t>& wstate,
const wbuffer_t<xint128_t>& wbuffer) NOEXCEPT
{
// This is a copy.
const auto start = wstate;

round_native< 0, Lane>(wstate, wbuffer);
round_native< 1, Lane>(wstate, wbuffer);
round_native< 2, Lane>(wstate, wbuffer);
round_native< 3, Lane>(wstate, wbuffer);
round_native< 4, Lane>(wstate, wbuffer);
round_native< 5, Lane>(wstate, wbuffer);
round_native< 6, Lane>(wstate, wbuffer);
round_native< 7, Lane>(wstate, wbuffer);
round_native< 8, Lane>(wstate, wbuffer);
round_native< 9, Lane>(wstate, wbuffer);
round_native<10, Lane>(wstate, wbuffer);
round_native<11, Lane>(wstate, wbuffer);
round_native<12, Lane>(wstate, wbuffer);
round_native<13, Lane>(wstate, wbuffer);
round_native<14, Lane>(wstate, wbuffer);
round_native<15, Lane>(wstate, wbuffer);

if constexpr (SHA::rounds == 80)
{
round_native<16, Lane>(wstate, wbuffer);
round_native<17, Lane>(wstate, wbuffer);
round_native<18, Lane>(wstate, wbuffer);
round_native<19, Lane>(wstate, wbuffer);
}

////summarize_native(wstate, start);
summarize(array_cast<word_t>(wstate), array_cast<word_t>(start));
}

TEMPLATE
template <typename xWord, size_t Lane>
INLINE void CLASS::
Expand All @@ -157,8 +254,17 @@ template <size_t Lane>
INLINE void CLASS::
compress_native(state_t& state, const buffer_t& buffer) NOEXCEPT
{
// TODO: Single block compression.
compress_<Lane>(state, buffer);
// TODO: sha160 state is too small to array cast into two xwords.
// neon and sha160 not yet implemented, sha512 is not native.
if constexpr (SHA::strength == 256 && !use_neon)
{
compress_native<Lane>(array_cast<xint128_t>(state),
array_cast<xint128_t>(buffer));
}
else
{
compress_<Lane>(state, buffer);
}
}

} // namespace sha
Expand Down
2 changes: 2 additions & 0 deletions include/bitcoin/system/intrinsics/xcpu/defines.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,7 @@ BC_POP_WARNING()
#define mm_extract_epi32(a, Lane) {}
#define mm_extract_epi64(a, Lane) {}
#define mm_shuffle_epi8(a, mask) (a)
#define mm_shuffle_epi32(a, mask) (a)
#define mm_load_si128(a) {}
#define mm_loadu_si128(a) {}
#define mm_store_si128(memory, a)
Expand Down Expand Up @@ -167,6 +168,7 @@ BC_POP_WARNING()
#define mm_extract_epi32(a, Lane) _mm_extract_epi32(a, Lane)
#define mm_extract_epi64(a, Lane) _mm_extract_epi64(a, Lane) // undefined for X32
#define mm_shuffle_epi8(a, mask) _mm_shuffle_epi8(a, mask)
#define mm_shuffle_epi32(a, mask) _mm_shuffle_epi32(a, mask)
#define mm_load_si128(a) _mm_load_si128(a)
#define mm_loadu_si128(a) _mm_loadu_si128(a)
#define mm_store_si128(memory, a) _mm_store_si128(memory, a)
Expand Down

0 comments on commit 16330a6

Please sign in to comment.