Skip to content

Commit

Permalink
v3.7.4
Browse files Browse the repository at this point in the history
  • Loading branch information
JayDDee committed Sep 29, 2021
1 parent 9b905fc commit 2cd1507
Show file tree
Hide file tree
Showing 80 changed files with 8,102 additions and 2,054 deletions.
3 changes: 3 additions & 0 deletions Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,9 @@ cpuminer_SOURCES = \
algo/ripemd/lbry.c \
algo/ripemd/lbry-4way.c \
algo/scrypt/scrypt.c \
algo/scrypt/scrypt-core-4way.c \
algo/scrypt/neoscrypt.c \
algo/sha/sha256-hash.c \
algo/sha/sph_sha2.c \
algo/sha/sph_sha2big.c \
algo/sha/sha256-hash-4way.c \
Expand All @@ -167,6 +169,7 @@ cpuminer_SOURCES = \
algo/sha/sha256-hash-2way-ni.c \
algo/sha/hmac-sha256-hash.c \
algo/sha/hmac-sha256-hash-4way.c \
algo/sha/sha256d.c \
algo/sha/sha2.c \
algo/sha/sha256t-gate.c \
algo/sha/sha256t-4way.c \
Expand Down
31 changes: 31 additions & 0 deletions RELEASE_NOTES
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,37 @@ If not what makes it happen or not happen?
Change Log
----------

v3.18.0

Complete rewrite of Scrypt code, optimized for large N factor (scryptn2):
- AVX512 & SHA support for SHA256, AVX512 has priority,
- up to 50% increase in hashrate,
- memory requirements reduced 30-60% depending on CPU architecture,
- memory usage displayed at startup,
- scrypt, default N=1024 (LTC), will likely perform slower.

Improved stale share detection and handling for Scrypt with large N factor:
- abort and discard partially computed hash when new work is detected,
- quicker response to new job, less time wasted mining stale job.

Improved stale share handling for all algorithms:
- report possible stale share when new work received with a previously
submitted share still pending,
- when new work is detected report the submission of an already completed,
otherwise valid, but likely stale, share,
- fixed incorrect block height in stale share log.

Small performance improvements to sha, bmw, cube & hamsi for AVX512 & AVX2.

When stratum disconnects miner threads go to idle until reconnected.

Colour changes to some logs.

Some low level function name changes for clarity and consistency.

The reference hashrate in the summary log and the benchmark total hashrate
are now the mean hashrate for the session.

v3.17.1

Fixed Windows build for AES+SSE4.2 (Westmere), was missing AES.
Expand Down
4 changes: 4 additions & 0 deletions algo-gate-api.h
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
#ifndef __ALGO_GATE_API_H__
#define __ALGO_GATE_API_H__ 1

#include <stdlib.h>
#include <stdbool.h>
#include <stdint.h>
Expand Down Expand Up @@ -319,3 +322,4 @@ void exec_hash_function( int algo, void *output, const void *pdata );
// algo name if valid alias, NULL if invalid alias or algo.
void get_algo_alias( char **algo_or_alias );

#endif
18 changes: 9 additions & 9 deletions algo/argon2/argon2d/blake2/blamka-round-opt.h
Original file line number Diff line number Diff line change
Expand Up @@ -328,7 +328,7 @@ static BLAKE2_INLINE __m128i fBlaMka(__m128i x, __m128i y) {

#include <immintrin.h>

#define ror64(x, n) _mm512_ror_epi64((x), (n))
#define ROR64(x, n) _mm512_ror_epi64((x), (n))

static __m512i muladd(__m512i x, __m512i y)
{
Expand All @@ -344,17 +344,17 @@ static __m512i muladd(__m512i x, __m512i y)
D0 = _mm512_xor_si512(D0, A0); \
D1 = _mm512_xor_si512(D1, A1); \
\
D0 = ror64(D0, 32); \
D1 = ror64(D1, 32); \
D0 = ROR64(D0, 32); \
D1 = ROR64(D1, 32); \
\
C0 = muladd(C0, D0); \
C1 = muladd(C1, D1); \
\
B0 = _mm512_xor_si512(B0, C0); \
B1 = _mm512_xor_si512(B1, C1); \
\
B0 = ror64(B0, 24); \
B1 = ror64(B1, 24); \
B0 = ROR64(B0, 24); \
B1 = ROR64(B1, 24); \
} while ((void)0, 0)

#define G2(A0, B0, C0, D0, A1, B1, C1, D1) \
Expand All @@ -365,17 +365,17 @@ static __m512i muladd(__m512i x, __m512i y)
D0 = _mm512_xor_si512(D0, A0); \
D1 = _mm512_xor_si512(D1, A1); \
\
D0 = ror64(D0, 16); \
D1 = ror64(D1, 16); \
D0 = ROR64(D0, 16); \
D1 = ROR64(D1, 16); \
\
C0 = muladd(C0, D0); \
C1 = muladd(C1, D1); \
\
B0 = _mm512_xor_si512(B0, C0); \
B1 = _mm512_xor_si512(B1, C1); \
\
B0 = ror64(B0, 63); \
B1 = ror64(B1, 63); \
B0 = ROR64(B0, 63); \
B1 = ROR64(B1, 63); \
} while ((void)0, 0)

#define DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \
Expand Down
Loading

0 comments on commit 2cd1507

Please sign in to comment.