Skip to content

Commit

Permalink
v3.16.4
Browse files Browse the repository at this point in the history
  • Loading branch information
JayDDee committed Jun 24, 2021
1 parent 3c5e892 commit a053690
Show file tree
Hide file tree
Showing 8 changed files with 93 additions and 26 deletions.
5 changes: 5 additions & 0 deletions RELEASE_NOTES
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,11 @@ If not what makes it happen or not happen?
Change Log
----------

v3.16.4

Faster sha512 and sha256 when not using SHA CPU extension.
#329: Fixed GBT incorrect target diff in stats.

v3.16.3

#313 Fix compile error with GCC 11.
Expand Down
38 changes: 36 additions & 2 deletions algo/sha/sha256-hash-4way.c
Original file line number Diff line number Diff line change
Expand Up @@ -74,9 +74,15 @@ static const uint32_t K256[64] =
#define CHs(X, Y, Z) \
_mm_xor_si128( _mm_and_si128( _mm_xor_si128( Y, Z ), X ), Z )

/*
#define MAJs(X, Y, Z) \
_mm_or_si128( _mm_and_si128( X, Y ), \
_mm_and_si128( _mm_or_si128( X, Y ), Z ) )
*/

#define MAJs(X, Y, Z) \
_mm_xor_si128( Y, _mm_and_si128( _mm_xor_si128( X, Y ), \
_mm_xor_si128( Y, Z ) ) )

#define BSG2_0(x) \
_mm_xor_si128( _mm_xor_si128( \
Expand Down Expand Up @@ -345,9 +351,20 @@ void sha256_4way_full( void *dst, const void *data, size_t len )
#define CHx(X, Y, Z) \
_mm256_xor_si256( _mm256_and_si256( _mm256_xor_si256( Y, Z ), X ), Z )

/*
#define MAJx(X, Y, Z) \
_mm256_or_si256( _mm256_and_si256( X, Y ), \
_mm256_and_si256( _mm256_or_si256( X, Y ), Z ) )
*/
/*
#define MAJx(X, Y, Z) \
_mm256_xor_si256( Y, _mm256_and_si256( _mm256_xor_si256( X, Y ), \
_mm256_xor_si256( Y, Z ) ) )
*/

#define MAJx(X, Y, Z) \
_mm256_xor_si256( Y, _mm256_and_si256( X_xor_Y = _mm256_xor_si256( X, Y ), \
Y_xor_Z ) )

#define BSG2_0x(x) \
_mm256_xor_si256( _mm256_xor_si256( \
Expand Down Expand Up @@ -375,14 +392,15 @@ do { \
T1 = _mm256_add_epi32( H, mm256_add4_32( BSG2_1x(E), CHx(E, F, G), \
K, W[i] ) ); \
T2 = _mm256_add_epi32( BSG2_0x(A), MAJx(A, B, C) ); \
Y_xor_Z = X_xor_Y; \
D = _mm256_add_epi32( D, T1 ); \
H = _mm256_add_epi32( T1, T2 ); \
} while (0)

static void
sha256_8way_round( sha256_8way_context *ctx, __m256i *in, __m256i r[8] )
{
register __m256i A, B, C, D, E, F, G, H;
register __m256i A, B, C, D, E, F, G, H, X_xor_Y, Y_xor_Z;
__m256i W[16];

mm256_block_bswap_32( W , in );
Expand Down Expand Up @@ -411,6 +429,8 @@ sha256_8way_round( sha256_8way_context *ctx, __m256i *in, __m256i r[8] )
H = m256_const1_64( 0x5BE0CD195BE0CD19 );
}

Y_xor_Z = _mm256_xor_si256( B, C );

SHA2s_8WAY_STEP( A, B, C, D, E, F, G, H, 0, 0 );
SHA2s_8WAY_STEP( H, A, B, C, D, E, F, G, 1, 0 );
SHA2s_8WAY_STEP( G, H, A, B, C, D, E, F, 2, 0 );
Expand Down Expand Up @@ -591,9 +611,20 @@ void sha256_8way_full( void *dst, const void *data, size_t len )
#define CHx16(X, Y, Z) \
_mm512_xor_si512( _mm512_and_si512( _mm512_xor_si512( Y, Z ), X ), Z )

/*
#define MAJx16(X, Y, Z) \
_mm512_or_si512( _mm512_and_si512( X, Y ), \
_mm512_and_si512( _mm512_or_si512( X, Y ), Z ) )
*/
/*
#define MAJx16(X, Y, Z) \
_mm512_xor_si512( Y, _mm512_and_si512( _mm512_xor_si512( X, Y ), \
_mm512_xor_si512( Y, Z ) ) )
*/

#define MAJx16(X, Y, Z) \
_mm512_xor_si512( Y, _mm512_and_si512( X_xor_Y = _mm512_xor_si512( X, Y ), \
Y_xor_Z ) )

#define BSG2_0x16(x) \
_mm512_xor_si512( _mm512_xor_si512( \
Expand Down Expand Up @@ -621,14 +652,15 @@ do { \
T1 = _mm512_add_epi32( H, mm512_add4_32( BSG2_1x16(E), CHx16(E, F, G), \
K, W[i] ) ); \
T2 = _mm512_add_epi32( BSG2_0x16(A), MAJx16(A, B, C) ); \
Y_xor_Z = X_xor_Y; \
D = _mm512_add_epi32( D, T1 ); \
H = _mm512_add_epi32( T1, T2 ); \
} while (0)

static void
sha256_16way_round( sha256_16way_context *ctx, __m512i *in, __m512i r[8] )
{
register __m512i A, B, C, D, E, F, G, H;
register __m512i A, B, C, D, E, F, G, H, X_xor_Y, Y_xor_Z;
__m512i W[16];

mm512_block_bswap_32( W , in );
Expand Down Expand Up @@ -657,6 +689,8 @@ sha256_16way_round( sha256_16way_context *ctx, __m512i *in, __m512i r[8] )
H = m512_const1_64( 0x5BE0CD195BE0CD19 );
}

Y_xor_Z = _mm512_xor_si512( B, C );

SHA2s_16WAY_STEP( A, B, C, D, E, F, G, H, 0, 0 );
SHA2s_16WAY_STEP( H, A, B, C, D, E, F, G, 1, 0 );
SHA2s_16WAY_STEP( G, H, A, B, C, D, E, F, 2, 0 );
Expand Down
40 changes: 32 additions & 8 deletions algo/sha/sha512-hash-4way.c
Original file line number Diff line number Diff line change
Expand Up @@ -98,9 +98,21 @@ static const uint64_t K512[80] =
#define CH8W(X, Y, Z) \
_mm512_xor_si512( _mm512_and_si512( _mm512_xor_si512( Y, Z ), X ), Z )

/*
#define MAJ8W(X, Y, Z) \
_mm512_or_si512( _mm512_and_si512( X, Y ), \
_mm512_and_si512( _mm512_or_si512( X, Y ), Z ) )
*/
/* Functionally identical to original but optimizable,
* subexpression X^Y from one step can be reused in the next step as Y^Z
#define MAJ8W(X, Y, Z) \
_mm512_xor_si512( Y, _mm512_and_si512( _mm512_xor_si512( X, Y ), \
_mm512_xor_si512( Y, Z ) ) )
*/

#define MAJ8W(X, Y, Z) \
_mm512_xor_si512( Y, _mm512_and_si512( X_xor_Y = _mm512_xor_si512( X, Y ), \
Y_xor_Z ) )

#define BSG8W_5_0(x) \
_mm512_xor_si512( _mm512_xor_si512( \
Expand Down Expand Up @@ -172,6 +184,7 @@ do { \
T1 = _mm512_add_epi64( H, mm512_add4_64( BSG8W_5_1(E), CH8W(E, F, G), \
K, W[i] ) ); \
T2 = _mm512_add_epi64( BSG8W_5_0(A), MAJ8W(A, B, C) ); \
Y_xor_Z = X_xor_Y; \
D = _mm512_add_epi64( D, T1 ); \
H = _mm512_add_epi64( T1, T2 ); \
} while (0)
Expand All @@ -180,7 +193,7 @@ static void
sha512_8way_round( sha512_8way_context *ctx, __m512i *in, __m512i r[8] )
{
int i;
register __m512i A, B, C, D, E, F, G, H;
register __m512i A, B, C, D, E, F, G, H, X_xor_Y, Y_xor_Z;
__m512i W[80];

mm512_block_bswap_64( W , in );
Expand Down Expand Up @@ -213,6 +226,8 @@ sha512_8way_round( sha512_8way_context *ctx, __m512i *in, __m512i r[8] )
H = m512_const1_64( 0x5BE0CD19137E2179 );
}

Y_xor_Z = _mm512_xor_si512( B, C );

for ( i = 0; i < 80; i += 8 )
{
SHA3_8WAY_STEP( A, B, C, D, E, F, G, H, i + 0 );
Expand Down Expand Up @@ -319,22 +334,28 @@ void sha512_8way_close( sha512_8way_context *sc, void *dst )

// SHA-512 4 way 64 bit

/*

#define CH(X, Y, Z) \
_mm256_xor_si256( _mm256_and_si256( _mm256_xor_si256( Y, Z ), X ), Z )

/*
#define MAJ(X, Y, Z) \
_mm256_or_si256( _mm256_and_si256( X, Y ), \
_mm256_and_si256( _mm256_or_si256( X, Y ), Z ) )
*/

#define MAJ(X, Y, Z) \
_mm256_xor_si256( Y, _mm256_and_si256( X_xor_Y = _mm256_xor_si256( X, Y ), \
Y_xor_Z ) )

#define BSG5_0(x) \
mm256_ror_64( _mm256_xor_si256( mm256_ror_64( \
_mm256_xor_si256( mm256_ror_64( x, 5 ), x ), 6 ), x ), 28 )

#define BSG5_1(x) \
mm256_ror_64( _mm256_xor_si256( mm256_ror_64( \
_mm256_xor_si256( mm256_ror_64( x, 23 ), x ), 4 ), x ), 14 )
*/

/*
#define BSG5_0(x) \
_mm256_xor_si256( _mm256_xor_si256( \
Expand Down Expand Up @@ -402,7 +423,7 @@ static inline __m256i ssg512_add( __m256i w0, __m256i w1 )
w1 = _mm256_xor_si256( X1a, X1b ); \
} while(0)
*/

/*
#define SHA3_4WAY_STEP(A, B, C, D, E, F, G, H, i) \
do { \
__m256i K = _mm256_set1_epi64x( K512[ i ] ); \
Expand Down Expand Up @@ -431,7 +452,7 @@ do { \
H = _mm256_add_epi64( T1, T2 ); \
D = _mm256_add_epi64( D, T1 ); \
} while (0)

*/
/*
#define SHA3_4WAY_STEP(A, B, C, D, E, F, G, H, i) \
do { \
Expand All @@ -445,24 +466,25 @@ do { \
} while (0)
*/

/*

#define SHA3_4WAY_STEP(A, B, C, D, E, F, G, H, i) \
do { \
__m256i T1, T2; \
__m256i K = _mm256_set1_epi64x( K512[ i ] ); \
T1 = _mm256_add_epi64( H, mm256_add4_64( BSG5_1(E), CH(E, F, G), \
K, W[i] ) ); \
T2 = _mm256_add_epi64( BSG5_0(A), MAJ(A, B, C) ); \
Y_xor_Z = X_xor_Y; \
D = _mm256_add_epi64( D, T1 ); \
H = _mm256_add_epi64( T1, T2 ); \
} while (0)
*/


static void
sha512_4way_round( sha512_4way_context *ctx, __m256i *in, __m256i r[8] )
{
int i;
register __m256i A, B, C, D, E, F, G, H;
register __m256i A, B, C, D, E, F, G, H, X_xor_Y, Y_xor_Z;
__m256i W[80];

mm256_block_bswap_64( W , in );
Expand Down Expand Up @@ -495,6 +517,8 @@ sha512_4way_round( sha512_4way_context *ctx, __m256i *in, __m256i r[8] )
H = m256_const1_64( 0x5BE0CD19137E2179 );
}

Y_xor_Z = _mm256_xor_si256( B, C );

for ( i = 0; i < 80; i += 8 )
{
SHA3_4WAY_STEP( A, B, C, D, E, F, G, H, i + 0 );
Expand Down
4 changes: 2 additions & 2 deletions algo/sha/sph_sha2.c
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,8 @@
#endif

#define CH(X, Y, Z) ((((Y) ^ (Z)) & (X)) ^ (Z))
#define MAJ(X, Y, Z) (((Y) & (Z)) | (((Y) | (Z)) & (X)))

//#define MAJ(X, Y, Z) (((Y) & (Z)) | (((Y) | (Z)) & (X)))
#define MAJ( X, Y, Z ) ( Y ^ ( ( X ^ Y ) & ( Y ^ Z ) ) )
#define ROTR SPH_ROTR32

#define BSG2_0(x) (ROTR(x, 2) ^ ROTR(x, 13) ^ ROTR(x, 22))
Expand Down
3 changes: 2 additions & 1 deletion algo/sha/sph_sha2big.c
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@
#if SPH_64

#define CH(X, Y, Z) ((((Y) ^ (Z)) & (X)) ^ (Z))
#define MAJ(X, Y, Z) (((X) & (Y)) | (((X) | (Y)) & (Z)))
//#define MAJ(X, Y, Z) (((X) & (Y)) | (((X) | (Y)) & (Z)))
#define MAJ( X, Y, Z ) ( Y ^ ( ( X ^ Y ) & ( Y ^ Z ) ) )

#define ROTR64 SPH_ROTR64

Expand Down
20 changes: 10 additions & 10 deletions configure
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.16.3.
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.16.4.
#
#
# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
Expand Down Expand Up @@ -577,8 +577,8 @@ MAKEFLAGS=
# Identity of this package.
PACKAGE_NAME='cpuminer-opt'
PACKAGE_TARNAME='cpuminer-opt'
PACKAGE_VERSION='3.16.3'
PACKAGE_STRING='cpuminer-opt 3.16.3'
PACKAGE_VERSION='3.16.4'
PACKAGE_STRING='cpuminer-opt 3.16.4'
PACKAGE_BUGREPORT=''
PACKAGE_URL=''

Expand Down Expand Up @@ -1332,7 +1332,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
\`configure' configures cpuminer-opt 3.16.3 to adapt to many kinds of systems.
\`configure' configures cpuminer-opt 3.16.4 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
Expand Down Expand Up @@ -1404,7 +1404,7 @@ fi

if test -n "$ac_init_help"; then
case $ac_init_help in
short | recursive ) echo "Configuration of cpuminer-opt 3.16.3:";;
short | recursive ) echo "Configuration of cpuminer-opt 3.16.4:";;
esac
cat <<\_ACEOF
Expand Down Expand Up @@ -1509,7 +1509,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
cpuminer-opt configure 3.16.3
cpuminer-opt configure 3.16.4
generated by GNU Autoconf 2.69
Copyright (C) 2012 Free Software Foundation, Inc.
Expand Down Expand Up @@ -2012,7 +2012,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
It was created by cpuminer-opt $as_me 3.16.3, which was
It was created by cpuminer-opt $as_me 3.16.4, which was
generated by GNU Autoconf 2.69. Invocation command line was
$ $0 $@
Expand Down Expand Up @@ -2993,7 +2993,7 @@ fi
# Define the identity of the package.
PACKAGE='cpuminer-opt'
VERSION='3.16.3'
VERSION='3.16.4'
cat >>confdefs.h <<_ACEOF
Expand Down Expand Up @@ -6690,7 +6690,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
This file was extended by cpuminer-opt $as_me 3.16.3, which was
This file was extended by cpuminer-opt $as_me 3.16.4, which was
generated by GNU Autoconf 2.69. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
Expand Down Expand Up @@ -6756,7 +6756,7 @@ _ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
ac_cs_version="\\
cpuminer-opt config.status 3.16.3
cpuminer-opt config.status 3.16.4
configured by $0, generated by GNU Autoconf 2.69,
with options \\"\$ac_cs_config\\"
Expand Down
2 changes: 1 addition & 1 deletion configure.ac
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
AC_INIT([cpuminer-opt], [3.16.3])
AC_INIT([cpuminer-opt], [3.16.4])

AC_PREREQ([2.59c])
AC_CANONICAL_SYSTEM
Expand Down
7 changes: 5 additions & 2 deletions cpu-miner.c
Original file line number Diff line number Diff line change
Expand Up @@ -447,8 +447,10 @@ static bool work_decode( const json_t *val, struct work *work )

if ( !allow_mininginfo )
net_diff = algo_gate.calc_network_diff( work );
else
net_diff = hash_to_diff( work->target );

work->targetdiff = hash_to_diff( work->target );
work->targetdiff = net_diff;
stratum_diff = last_targetdiff = work->targetdiff;
work->sharediff = 0;
algo_gate.decode_extra_data( work, &net_blocks );
Expand Down Expand Up @@ -908,7 +910,8 @@ static bool gbt_work_decode( const json_t *val, struct work *work )
}
for ( i = 0; i < ARRAY_SIZE( work->target ); i++ )
work->target[7 - i] = be32dec( target + i );

net_diff = work->targetdiff = hash_to_diff( work->target );

tmp = json_object_get( val, "workid" );
if ( tmp )
{
Expand Down

0 comments on commit a053690

Please sign in to comment.