v3.10.5

JayDDee · Dec 21, 2019 · c65b0ff · c65b0ff
1 parent a17ff6f
commit c65b0ff
Show file tree

Hide file tree

Showing 72 changed files with 9,091 additions and 1,337 deletions.
diff --git a/INSTALL_LINUX b/INSTALL_LINUX
@@ -1,12 +1,14 @@
 
 
-Requirements:
+1. Requirements:
+---------------
 
 Intel Core2 or newer, or AMD Steamroller or newer CPU. ARM CPUs are not
 supported.
 64 bit Linux operating system. Apple is not supported.
 
-Building on linux prerequisites:
+2. Building on linux prerequisites:
+-----------------------------------
 
 It is assumed users know how to install packages on their system and
 be able to compile standard source packages. This is basic Linux and
@@ -20,41 +22,74 @@ http://askubuntu.com/questions/457526/how-to-install-cpuminer-in-ubuntu
 
 Install any additional dependencies needed by cpuminer-opt. The list below
 are some of the ones that may not be in the default install and need to
-be installed manually. There may be others, read the error messages they
-will give a clue as to the missing package.
+be installed manually. There may be others, read the compiler error messages,
+they will give a clue as to the missing package.
 
 The following command should install everything you need on Debian based
 distributions such as Ubuntu. Fedora and other distributions may have similar
-but different package names.
+but different package names. 
 
-sudo apt-get install build-essential libssl-dev libcurl4-openssl-dev libjansson-dev libgmp-dev zlib1g-dev
+$ sudo apt-get install build-essential automake libssl-dev libcurl4-openssl-dev libjansson-dev libgmp-dev zlib1g-dev git
 
 SHA support on AMD Ryzen CPUs requires gcc version 5 or higher and
-openssl 1.1.0e or higher. Add one of the following, depending on the
-compiler version, to CFLAGS:
-"-march=native" or "-march=znver1" or "-msha".
+openssl 1.1.0e or higher. Add one of the following to CFLAGS for SHA
+support depending on your CPU and compiler version:
+
+"-march=native" is always the best choice
+
+"-march=znver1" for Ryzen 1000 & 2000 series, znver2 for 3000.
+
+"-msha"  Add SHA to other tuning options
 
 Additional instructions for static compilalation can be found here:
 https://lxadm.com/Static_compilation_of_cpuminer
 Static builds should only considered in a homogeneous HW and SW environment.
 Local builds will always have the best performance and compatibility.
 
-Extract cpuminer source.
+3. Download cpuminer-opt
+------------------------
 
-tar xvzf cpuminer-opt-x.y.z.tar.gz
-cd cpuminer-opt-x.y.z
+Download the source code for the latest realease from the official repository.
 
-Run ./build.sh to build on Linux or execute the following commands.
+https://github.com/JayDDee/cpuminer-opt/releases
 
-./autogen.sh
-CFLAGS="-O3 -march=native -Wall" ./configure --with-curl
-make
+Extract the source code.
+
+$ tar xvzf cpuminer-opt-x.y.z.tar.gz
+
+
+Alternatively it can be cloned from git.
 
-Start mining.
+$ git clone https://github.com/JayDDee/cpuminer-opt.git
+
+4. Build cpuminer-opt
+---------------------
+
+It is recomended to Build with default options, this will usuallly
+produce the best results.
+
+$ ./build.sh to build on Linux or execute the following commands.
+
+or 
+
+$ ./autogen.sh
+$ CFLAGS="-O3 -march=native -Wall" ./configure --with-curl
+$ make -j n
+
+n is the number of threads.
+
+5. Start mining.
+----------------
+
+$ ./cpuminer -a algo -o url -u username -p password
 
-./cpuminer -a algo -o url -u username -p password
 
 Windows
+-------
+
+See also INSTAL_WINDOWS
+
+The following procedure is obsolete and uses an old compiler.
 
 Precompiled Windows binaries are built on a Linux host using Mingw
 with a more recent compiler than the following Windows hosted procedure.

diff --git a/Makefile.am b/Makefile.am
@@ -124,6 +124,8 @@ cpuminer_SOURCES = \
   algo/luffa/luffa-hash-2way.c \
   algo/lyra2/lyra2.c \
   algo/lyra2/sponge.c \
+  algo/lyra2/sponge-2way.c \
+  algo/lyra2/lyra2-hash-2way.c \
   algo/lyra2/lyra2-gate.c \
   algo/lyra2/lyra2rev2.c \
   algo/lyra2/lyra2rev2-4way.c \

diff --git a/RELEASE_NOTES b/RELEASE_NOTES
@@ -1,6 +1,8 @@
 cpuminer-opt is a console program run from the command line using the
 keyboard, not the mouse.
 
+See also README.md for list of supported algorithms,
+
 Security warning
 ----------------
 
@@ -31,7 +33,21 @@ not supported. FreeBSD YMMV.
 Change Log
 ----------
 
-v3.10.2
+v3.10.5
+
+AVX512 for x17, sonoa, xevan, hmq1725, lyra2rev3, lyra2rev2. 
+Faster hmq1725 AVX2.
+
+v3.10.4
+
+AVX512 for x16r, x16rv2, x16rt, x16s, x16rt-veil (veil).
+
+v3.10.3
+
+AVX512 for x12, x13, x14, x15.
+Fixed x12 AVX2 invalid shares.
+
+v.10.2
 
 AVX512 added for bmw512, c11, phi1612 (phi), qubit, skunk, x11, x11gost (sib).
 Fixed c11 AVX2 invalid shares.

diff --git a/algo/argon2/argon2d/blake2/blamka-round-opt.h b/algo/argon2/argon2d/blake2/blamka-round-opt.h
@@ -184,10 +184,10 @@ static BLAKE2_INLINE __m128i fBlaMka(__m128i x, __m128i y) {
 
 #include <immintrin.h>
 
-#define  rotr32  mm256_swap32_64
-#define  rotr24  mm256_ror3x8_64
-#define  rotr16  mm256_ror1x16_64
-#define  rotr63( x ) mm256_rol_64( x, 1 )
+#define  rotr32( x )  mm256_ror_64( x, 32 )
+#define  rotr24( x )  mm256_ror_64( x, 24 )
+#define  rotr16( x )  mm256_ror_64( x, 16 )
+#define  rotr63( x )  mm256_rol_64( x,  1 )
 
 //#define rotr32(x)   _mm256_shuffle_epi32(x, _MM_SHUFFLE(2, 3, 0, 1))
 //#define rotr24(x)   _mm256_shuffle_epi8(x, _mm256_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10, 3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10))

diff --git a/algo/blake/blake-hash-4way.h b/algo/blake/blake-hash-4way.h
@@ -70,19 +70,22 @@ typedef struct {
 // Default 14 rounds
 typedef blake_4way_small_context blake256_4way_context;
 void blake256_4way_init(void *ctx);
-void blake256_4way(void *ctx, const void *data, size_t len);
+void blake256_4way_update(void *ctx, const void *data, size_t len);
+#define blake256_4way blake256_4way_update
 void blake256_4way_close(void *ctx, void *dst);
 
 // 14 rounds, blake, decred
 typedef blake_4way_small_context blake256r14_4way_context;
 void blake256r14_4way_init(void *cc);
-void blake256r14_4way(void *cc, const void *data, size_t len);
+void blake256r14_4way_update(void *cc, const void *data, size_t len);
+#define blake256r14_4way blake256r14_4way_update
 void blake256r14_4way_close(void *cc, void *dst);
 
 // 8 rounds, blakecoin, vanilla
 typedef blake_4way_small_context blake256r8_4way_context;
 void blake256r8_4way_init(void *cc);
-void blake256r8_4way(void *cc, const void *data, size_t len);
+void blake256r8_4way_update(void *cc, const void *data, size_t len);
+#define blake256r8_4way blake256r8_4way_update
 void blake256r8_4way_close(void *cc, void *dst);
 
 #ifdef __AVX2__
@@ -100,19 +103,21 @@ typedef struct {
 // Default 14 rounds
 typedef blake_8way_small_context blake256_8way_context;
 void blake256_8way_init(void *cc);
-void blake256_8way(void *cc, const void *data, size_t len);
+void blake256_8way_update(void *cc, const void *data, size_t len);
+#define blake256_8way blake256_8way_update
 void blake256_8way_close(void *cc, void *dst);
 
 // 14 rounds, blake, decred
 typedef blake_8way_small_context blake256r14_8way_context;
 void blake256r14_8way_init(void *cc);
-void blake256r14_8way(void *cc, const void *data, size_t len);
+void blake256r14_8way_update(void *cc, const void *data, size_t len);
 void blake256r14_8way_close(void *cc, void *dst);
 
 // 8 rounds, blakecoin, vanilla
 typedef blake_8way_small_context blake256r8_8way_context;
 void blake256r8_8way_init(void *cc);
-void blake256r8_8way(void *cc, const void *data, size_t len);
+void blake256r8_8way_update(void *cc, const void *data, size_t len);
+#define blake256r8_8way blake256r8_8way_update
 void blake256r8_8way_close(void *cc, void *dst);
 
 // Blake-512 4 way

diff --git a/algo/blake/blake256-hash-4way.c b/algo/blake/blake256-hash-4way.c
@@ -634,7 +634,7 @@ do { \
                               m256_const1_64( 0x082EFA98082EFA98 ) ); \
    VF = _mm256_xor_si256( _mm256_set1_epi32( T1 ), \
                               m256_const1_64( 0xEC4E6C89EC4E6C89 ) ); \
-   shuf_bswap32 = m256_const_64( 0x0c0d0e0f08090a0b, 0x0405060700010203, \
+   shuf_bswap32 = m256_const_64( 0x1c1d1e1f18191a1b, 0x1415161710111213, \
                                  0x0c0d0e0f08090a0b, 0x0405060700010203 ); \
    M0 = _mm256_shuffle_epi8( * buf    , shuf_bswap32 ); \
    M1 = _mm256_shuffle_epi8( *(buf+ 1), shuf_bswap32 ); \
@@ -1184,7 +1184,7 @@ blake256_16way_update(void *cc, const void *data, size_t len)
 }
 
 void
-blake256_16way_close_update(void *cc, void *dst)
+blake256_16way_close(void *cc, void *dst)
 {
         blake32_16way_close(cc, 0, 0, dst, 8);
 }
@@ -1259,7 +1259,7 @@ blake256_8way_init(void *cc)
 }
 
 void
-blake256_8way(void *cc, const void *data, size_t len)
+blake256_8way_update(void *cc, const void *data, size_t len)
 {
         blake32_8way(cc, data, len);
 }
@@ -1279,7 +1279,7 @@ void blake256r14_4way_init(void *cc)
 }
 
 void
-blake256r14_4way(void *cc, const void *data, size_t len)
+blake256r14_4way_update(void *cc, const void *data, size_t len)
 {
    blake32_4way(cc, data, len);
 }
@@ -1298,7 +1298,7 @@ void blake256r14_8way_init(void *cc)
 }
 
 void
-blake256r14_8way(void *cc, const void *data, size_t len)
+blake256r14_8way_update(void *cc, const void *data, size_t len)
 {
    blake32_8way(cc, data, len);
 }
@@ -1318,7 +1318,7 @@ void blake256r8_4way_init(void *cc)
 }
 
 void
-blake256r8_4way(void *cc, const void *data, size_t len)
+blake256r8_4way_update(void *cc, const void *data, size_t len)
 {
    blake32_4way(cc, data, len);
 }
@@ -1337,7 +1337,7 @@ void blake256r8_8way_init(void *cc)
 }
 
 void
-blake256r8_8way(void *cc, const void *data, size_t len)
+blake256r8_8way_update(void *cc, const void *data, size_t len)
 {
    blake32_8way(cc, data, len);
 }

diff --git a/algo/bmw/bmw256-hash-4way.c b/algo/bmw/bmw256-hash-4way.c
@@ -874,6 +874,57 @@ void compress_small_8way( const __m256i *M, const __m256i H[16],
                  mm256_xor4( qt[24], qt[25], qt[26], qt[27] ),
                  mm256_xor4( qt[28], qt[29], qt[30], qt[31] ) ) );
 
+#define DH1L( m, sl, sr, a, b, c ) \
+   _mm256_add_epi32( \
+               _mm256_xor_si256( M[m], \
+                  _mm256_xor_si256( _mm256_slli_epi32( xh, sl ), \
+                                    _mm256_srli_epi32( qt[a], sr ) ) ), \
+               _mm256_xor_si256( _mm256_xor_si256( xl, qt[b] ), qt[c] ) )
+
+#define DH1R( m, sl, sr, a, b, c ) \
+   _mm256_add_epi32( \
+               _mm256_xor_si256( M[m], \
+                  _mm256_xor_si256( _mm256_srli_epi32( xh, sl ), \
+                                    _mm256_slli_epi32( qt[a], sr ) ) ), \
+               _mm256_xor_si256( _mm256_xor_si256( xl, qt[b] ), qt[c] ) )
+
+#define DH2L( m, rl, sl, h, a, b, c ) \
+   _mm256_add_epi32( _mm256_add_epi32( \
+       mm256_rol_32( dH[h], rl ), \
+          _mm256_xor_si256( _mm256_xor_si256( xh, qt[a] ), M[m] )), \
+                 _mm256_xor_si256( _mm256_slli_epi32( xl, sl ), \
+                                   _mm256_xor_si256( qt[b], qt[c] ) ) );
+
+#define DH2R( m, rl, sr, h, a, b, c ) \
+   _mm256_add_epi32( _mm256_add_epi32( \
+       mm256_rol_32( dH[h], rl ), \
+          _mm256_xor_si256( _mm256_xor_si256( xh, qt[a] ), M[m] )), \
+                 _mm256_xor_si256( _mm256_srli_epi32( xl, sr ), \
+                                   _mm256_xor_si256( qt[b], qt[c] ) ) );
+
+   dH[ 0] = DH1L(  0,  5,  5, 16, 24, 0 );
+   dH[ 1] = DH1R(  1,  7,  8, 17, 25, 1 );
+   dH[ 2] = DH1R(  2,  5,  5, 18, 26, 2 );
+   dH[ 3] = DH1R(  3,  1,  5, 19, 27, 3 );
+   dH[ 4] = DH1R(  4,  3,  0, 20, 28, 4 );
+   dH[ 5] = DH1L(  5,  6,  6, 21, 29, 5 );
+   dH[ 6] = DH1R(  6,  4,  6, 22, 30, 6 );
+   dH[ 7] = DH1R(  7, 11,  2, 23, 31, 7 );
+   dH[ 8] = DH2L(  8,  9,  8,  4, 24, 23,  8 );
+   dH[ 9] = DH2R(  9, 10,  6,  5, 25, 16,  9 );
+   dH[10] = DH2L( 10, 11,  6,  6, 26, 17, 10 );
+   dH[11] = DH2L( 11, 12,  4,  7, 27, 18, 11 );
+   dH[12] = DH2R( 12, 13,  3,  0, 28, 19, 12 );
+   dH[13] = DH2R( 13, 14,  4,  1, 29, 20, 13 );
+   dH[14] = DH2R( 14, 15,  7,  2, 30, 21, 14 );
+   dH[15] = DH2R( 15, 16,  2,  3, 31, 22, 15 );
+
+#undef DH1L
+#undef DH1R
+#undef DH2L
+#undef DH2R
+
+/*   
    dH[ 0] = _mm256_add_epi32(
                  _mm256_xor_si256( M[0],
                       _mm256_xor_si256( _mm256_slli_epi32( xh, 5 ),
@@ -954,6 +1005,7 @@ void compress_small_8way( const __m256i *M, const __m256i H[16],
                  _mm256_xor_si256( _mm256_xor_si256( xh, qt[31] ), M[15] )),
                  _mm256_xor_si256( _mm256_srli_epi32( xl, 2 ),
                                    _mm256_xor_si256( qt[22], qt[15] ) ) );
+*/
 }
 
 static const __m256i final_s8[16] =