v3.7.4

JayDDee · Sep 29, 2021 · 2cd1507 · 2cd1507
1 parent 9b905fc
commit 2cd1507
Show file tree

Hide file tree

Showing 80 changed files with 8,102 additions and 2,054 deletions.
diff --git a/Makefile.am b/Makefile.am
@@ -158,7 +158,9 @@ cpuminer_SOURCES = \
   algo/ripemd/lbry.c \
   algo/ripemd/lbry-4way.c \
   algo/scrypt/scrypt.c \
+  algo/scrypt/scrypt-core-4way.c \
   algo/scrypt/neoscrypt.c \
+  algo/sha/sha256-hash.c \
   algo/sha/sph_sha2.c \
   algo/sha/sph_sha2big.c \
   algo/sha/sha256-hash-4way.c \
@@ -167,6 +169,7 @@ cpuminer_SOURCES = \
   algo/sha/sha256-hash-2way-ni.c \
   algo/sha/hmac-sha256-hash.c \
   algo/sha/hmac-sha256-hash-4way.c \
+  algo/sha/sha256d.c \
   algo/sha/sha2.c \
   algo/sha/sha256t-gate.c \
   algo/sha/sha256t-4way.c \

diff --git a/RELEASE_NOTES b/RELEASE_NOTES
@@ -65,6 +65,37 @@ If not what makes it happen or not happen?
 Change Log
 ----------
 
+v3.18.0
+
+Complete rewrite of Scrypt code, optimized for large N factor (scryptn2):
+  - AVX512 & SHA support for SHA256, AVX512 has priority,
+  - up to 50% increase in hashrate,
+  - memory requirements reduced 30-60% depending on CPU architecture,
+  - memory usage displayed at startup,
+  - scrypt, default N=1024 (LTC), will likely perform slower.
+
+Improved stale share detection and handling for Scrypt with large N factor:
+  - abort and discard partially computed hash when new work is detected,
+  - quicker response to new job, less time wasted mining stale job.
+
+Improved stale share handling for all algorithms:
+  - report possible stale share when new work received with a previously
+    submitted share still pending,
+  - when new work is detected report the submission of an already completed,
+    otherwise valid, but likely stale, share,
+  - fixed incorrect block height in stale share log.
+
+Small performance improvements to sha, bmw, cube & hamsi for AVX512 & AVX2.
+
+When stratum disconnects miner threads go to idle until reconnected.
+
+Colour changes to some logs.
+
+Some low level function name changes for clarity and consistency.
+
+The reference hashrate in the summary log and the benchmark total hashrate
+are now the mean hashrate for the session. 
+
 v3.17.1
 
 Fixed Windows build for AES+SSE4.2 (Westmere), was missing AES.

diff --git a/algo-gate-api.h b/algo-gate-api.h
@@ -1,3 +1,6 @@
+#ifndef __ALGO_GATE_API_H__
+#define __ALGO_GATE_API_H__ 1
+
 #include <stdlib.h>
 #include <stdbool.h>
 #include <stdint.h>
@@ -319,3 +322,4 @@ void exec_hash_function( int algo, void *output, const void *pdata );
 // algo name if valid alias, NULL if invalid alias or algo.
 void get_algo_alias( char **algo_or_alias );
 
+#endif
diff --git a/algo/argon2/argon2d/blake2/blamka-round-opt.h b/algo/argon2/argon2d/blake2/blamka-round-opt.h
@@ -328,7 +328,7 @@ static BLAKE2_INLINE __m128i fBlaMka(__m128i x, __m128i y) {
 
 #include <immintrin.h>
 
-#define ror64(x, n) _mm512_ror_epi64((x), (n))
+#define ROR64(x, n) _mm512_ror_epi64((x), (n))
 
 static __m512i muladd(__m512i x, __m512i y)
 {
@@ -344,17 +344,17 @@ static __m512i muladd(__m512i x, __m512i y)
         D0 = _mm512_xor_si512(D0, A0); \
         D1 = _mm512_xor_si512(D1, A1); \
 \
-        D0 = ror64(D0, 32); \
-        D1 = ror64(D1, 32); \
+        D0 = ROR64(D0, 32); \
+        D1 = ROR64(D1, 32); \
 \
         C0 = muladd(C0, D0); \
         C1 = muladd(C1, D1); \
 \
         B0 = _mm512_xor_si512(B0, C0); \
         B1 = _mm512_xor_si512(B1, C1); \
 \
-        B0 = ror64(B0, 24); \
-        B1 = ror64(B1, 24); \
+        B0 = ROR64(B0, 24); \
+        B1 = ROR64(B1, 24); \
     } while ((void)0, 0)
 
 #define G2(A0, B0, C0, D0, A1, B1, C1, D1) \
@@ -365,17 +365,17 @@ static __m512i muladd(__m512i x, __m512i y)
         D0 = _mm512_xor_si512(D0, A0); \
         D1 = _mm512_xor_si512(D1, A1); \
 \
-        D0 = ror64(D0, 16); \
-        D1 = ror64(D1, 16); \
+        D0 = ROR64(D0, 16); \
+        D1 = ROR64(D1, 16); \
 \
         C0 = muladd(C0, D0); \
         C1 = muladd(C1, D1); \
 \
         B0 = _mm512_xor_si512(B0, C0); \
         B1 = _mm512_xor_si512(B1, C1); \
 \
-        B0 = ror64(B0, 63); \
-        B1 = ror64(B1, 63); \
+        B0 = ROR64(B0, 63); \
+        B1 = ROR64(B1, 63); \
     } while ((void)0, 0)
 
 #define DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \