Skip to content

Commit

Permalink
v3.10.5
Browse files Browse the repository at this point in the history
  • Loading branch information
JayDDee committed Dec 21, 2019
1 parent a17ff6f commit c65b0ff
Show file tree
Hide file tree
Showing 72 changed files with 9,091 additions and 1,337 deletions.
71 changes: 53 additions & 18 deletions INSTALL_LINUX
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@


Requirements:
1. Requirements:
---------------

Intel Core2 or newer, or AMD Steamroller or newer CPU. ARM CPUs are not
supported.
64 bit Linux operating system. Apple is not supported.

Building on linux prerequisites:
2. Building on linux prerequisites:
-----------------------------------

It is assumed users know how to install packages on their system and
be able to compile standard source packages. This is basic Linux and
Expand All @@ -20,41 +22,74 @@ http://askubuntu.com/questions/457526/how-to-install-cpuminer-in-ubuntu

Install any additional dependencies needed by cpuminer-opt. The list below
are some of the ones that may not be in the default install and need to
be installed manually. There may be others, read the error messages they
will give a clue as to the missing package.
be installed manually. There may be others, read the compiler error messages,
they will give a clue as to the missing package.

The following command should install everything you need on Debian based
distributions such as Ubuntu. Fedora and other distributions may have similar
but different package names.
but different package names.

sudo apt-get install build-essential libssl-dev libcurl4-openssl-dev libjansson-dev libgmp-dev zlib1g-dev
$ sudo apt-get install build-essential automake libssl-dev libcurl4-openssl-dev libjansson-dev libgmp-dev zlib1g-dev git

SHA support on AMD Ryzen CPUs requires gcc version 5 or higher and
openssl 1.1.0e or higher. Add one of the following, depending on the
compiler version, to CFLAGS:
"-march=native" or "-march=znver1" or "-msha".
openssl 1.1.0e or higher. Add one of the following to CFLAGS for SHA
support depending on your CPU and compiler version:

"-march=native" is always the best choice

"-march=znver1" for Ryzen 1000 & 2000 series, znver2 for 3000.

"-msha" Add SHA to other tuning options

Additional instructions for static compilalation can be found here:
https://lxadm.com/Static_compilation_of_cpuminer
Static builds should only considered in a homogeneous HW and SW environment.
Local builds will always have the best performance and compatibility.

Extract cpuminer source.
3. Download cpuminer-opt
------------------------

tar xvzf cpuminer-opt-x.y.z.tar.gz
cd cpuminer-opt-x.y.z
Download the source code for the latest realease from the official repository.

Run ./build.sh to build on Linux or execute the following commands.
https://github.com/JayDDee/cpuminer-opt/releases

./autogen.sh
CFLAGS="-O3 -march=native -Wall" ./configure --with-curl
make
Extract the source code.

$ tar xvzf cpuminer-opt-x.y.z.tar.gz


Alternatively it can be cloned from git.

Start mining.
$ git clone https://github.com/JayDDee/cpuminer-opt.git

4. Build cpuminer-opt
---------------------

It is recomended to Build with default options, this will usuallly
produce the best results.

$ ./build.sh to build on Linux or execute the following commands.

or

$ ./autogen.sh
$ CFLAGS="-O3 -march=native -Wall" ./configure --with-curl
$ make -j n

n is the number of threads.

5. Start mining.
----------------

$ ./cpuminer -a algo -o url -u username -p password

./cpuminer -a algo -o url -u username -p password

Windows
-------

See also INSTAL_WINDOWS

The following procedure is obsolete and uses an old compiler.

Precompiled Windows binaries are built on a Linux host using Mingw
with a more recent compiler than the following Windows hosted procedure.
Expand Down
2 changes: 2 additions & 0 deletions Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,8 @@ cpuminer_SOURCES = \
algo/luffa/luffa-hash-2way.c \
algo/lyra2/lyra2.c \
algo/lyra2/sponge.c \
algo/lyra2/sponge-2way.c \
algo/lyra2/lyra2-hash-2way.c \
algo/lyra2/lyra2-gate.c \
algo/lyra2/lyra2rev2.c \
algo/lyra2/lyra2rev2-4way.c \
Expand Down
18 changes: 17 additions & 1 deletion RELEASE_NOTES
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
cpuminer-opt is a console program run from the command line using the
keyboard, not the mouse.

See also README.md for list of supported algorithms,

Security warning
----------------

Expand Down Expand Up @@ -31,7 +33,21 @@ not supported. FreeBSD YMMV.
Change Log
----------

v3.10.2
v3.10.5

AVX512 for x17, sonoa, xevan, hmq1725, lyra2rev3, lyra2rev2.
Faster hmq1725 AVX2.

v3.10.4

AVX512 for x16r, x16rv2, x16rt, x16s, x16rt-veil (veil).

v3.10.3

AVX512 for x12, x13, x14, x15.
Fixed x12 AVX2 invalid shares.

v.10.2

AVX512 added for bmw512, c11, phi1612 (phi), qubit, skunk, x11, x11gost (sib).
Fixed c11 AVX2 invalid shares.
Expand Down
8 changes: 4 additions & 4 deletions algo/argon2/argon2d/blake2/blamka-round-opt.h
Original file line number Diff line number Diff line change
Expand Up @@ -184,10 +184,10 @@ static BLAKE2_INLINE __m128i fBlaMka(__m128i x, __m128i y) {

#include <immintrin.h>

#define rotr32 mm256_swap32_64
#define rotr24 mm256_ror3x8_64
#define rotr16 mm256_ror1x16_64
#define rotr63( x ) mm256_rol_64( x, 1 )
#define rotr32( x ) mm256_ror_64( x, 32 )
#define rotr24( x ) mm256_ror_64( x, 24 )
#define rotr16( x ) mm256_ror_64( x, 16 )
#define rotr63( x ) mm256_rol_64( x, 1 )

//#define rotr32(x) _mm256_shuffle_epi32(x, _MM_SHUFFLE(2, 3, 0, 1))
//#define rotr24(x) _mm256_shuffle_epi8(x, _mm256_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10, 3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10))
Expand Down
17 changes: 11 additions & 6 deletions algo/blake/blake-hash-4way.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,19 +70,22 @@ typedef struct {
// Default 14 rounds
typedef blake_4way_small_context blake256_4way_context;
void blake256_4way_init(void *ctx);
void blake256_4way(void *ctx, const void *data, size_t len);
void blake256_4way_update(void *ctx, const void *data, size_t len);
#define blake256_4way blake256_4way_update
void blake256_4way_close(void *ctx, void *dst);

// 14 rounds, blake, decred
typedef blake_4way_small_context blake256r14_4way_context;
void blake256r14_4way_init(void *cc);
void blake256r14_4way(void *cc, const void *data, size_t len);
void blake256r14_4way_update(void *cc, const void *data, size_t len);
#define blake256r14_4way blake256r14_4way_update
void blake256r14_4way_close(void *cc, void *dst);

// 8 rounds, blakecoin, vanilla
typedef blake_4way_small_context blake256r8_4way_context;
void blake256r8_4way_init(void *cc);
void blake256r8_4way(void *cc, const void *data, size_t len);
void blake256r8_4way_update(void *cc, const void *data, size_t len);
#define blake256r8_4way blake256r8_4way_update
void blake256r8_4way_close(void *cc, void *dst);

#ifdef __AVX2__
Expand All @@ -100,19 +103,21 @@ typedef struct {
// Default 14 rounds
typedef blake_8way_small_context blake256_8way_context;
void blake256_8way_init(void *cc);
void blake256_8way(void *cc, const void *data, size_t len);
void blake256_8way_update(void *cc, const void *data, size_t len);
#define blake256_8way blake256_8way_update
void blake256_8way_close(void *cc, void *dst);

// 14 rounds, blake, decred
typedef blake_8way_small_context blake256r14_8way_context;
void blake256r14_8way_init(void *cc);
void blake256r14_8way(void *cc, const void *data, size_t len);
void blake256r14_8way_update(void *cc, const void *data, size_t len);
void blake256r14_8way_close(void *cc, void *dst);

// 8 rounds, blakecoin, vanilla
typedef blake_8way_small_context blake256r8_8way_context;
void blake256r8_8way_init(void *cc);
void blake256r8_8way(void *cc, const void *data, size_t len);
void blake256r8_8way_update(void *cc, const void *data, size_t len);
#define blake256r8_8way blake256r8_8way_update
void blake256r8_8way_close(void *cc, void *dst);

// Blake-512 4 way
Expand Down
14 changes: 7 additions & 7 deletions algo/blake/blake256-hash-4way.c
Original file line number Diff line number Diff line change
Expand Up @@ -634,7 +634,7 @@ do { \
m256_const1_64( 0x082EFA98082EFA98 ) ); \
VF = _mm256_xor_si256( _mm256_set1_epi32( T1 ), \
m256_const1_64( 0xEC4E6C89EC4E6C89 ) ); \
shuf_bswap32 = m256_const_64( 0x0c0d0e0f08090a0b, 0x0405060700010203, \
shuf_bswap32 = m256_const_64( 0x1c1d1e1f18191a1b, 0x1415161710111213, \
0x0c0d0e0f08090a0b, 0x0405060700010203 ); \
M0 = _mm256_shuffle_epi8( * buf , shuf_bswap32 ); \
M1 = _mm256_shuffle_epi8( *(buf+ 1), shuf_bswap32 ); \
Expand Down Expand Up @@ -1184,7 +1184,7 @@ blake256_16way_update(void *cc, const void *data, size_t len)
}

void
blake256_16way_close_update(void *cc, void *dst)
blake256_16way_close(void *cc, void *dst)
{
blake32_16way_close(cc, 0, 0, dst, 8);
}
Expand Down Expand Up @@ -1259,7 +1259,7 @@ blake256_8way_init(void *cc)
}

void
blake256_8way(void *cc, const void *data, size_t len)
blake256_8way_update(void *cc, const void *data, size_t len)
{
blake32_8way(cc, data, len);
}
Expand All @@ -1279,7 +1279,7 @@ void blake256r14_4way_init(void *cc)
}

void
blake256r14_4way(void *cc, const void *data, size_t len)
blake256r14_4way_update(void *cc, const void *data, size_t len)
{
blake32_4way(cc, data, len);
}
Expand All @@ -1298,7 +1298,7 @@ void blake256r14_8way_init(void *cc)
}

void
blake256r14_8way(void *cc, const void *data, size_t len)
blake256r14_8way_update(void *cc, const void *data, size_t len)
{
blake32_8way(cc, data, len);
}
Expand All @@ -1318,7 +1318,7 @@ void blake256r8_4way_init(void *cc)
}

void
blake256r8_4way(void *cc, const void *data, size_t len)
blake256r8_4way_update(void *cc, const void *data, size_t len)
{
blake32_4way(cc, data, len);
}
Expand All @@ -1337,7 +1337,7 @@ void blake256r8_8way_init(void *cc)
}

void
blake256r8_8way(void *cc, const void *data, size_t len)
blake256r8_8way_update(void *cc, const void *data, size_t len)
{
blake32_8way(cc, data, len);
}
Expand Down
52 changes: 52 additions & 0 deletions algo/bmw/bmw256-hash-4way.c
Original file line number Diff line number Diff line change
Expand Up @@ -874,6 +874,57 @@ void compress_small_8way( const __m256i *M, const __m256i H[16],
mm256_xor4( qt[24], qt[25], qt[26], qt[27] ),
mm256_xor4( qt[28], qt[29], qt[30], qt[31] ) ) );

#define DH1L( m, sl, sr, a, b, c ) \
_mm256_add_epi32( \
_mm256_xor_si256( M[m], \
_mm256_xor_si256( _mm256_slli_epi32( xh, sl ), \
_mm256_srli_epi32( qt[a], sr ) ) ), \
_mm256_xor_si256( _mm256_xor_si256( xl, qt[b] ), qt[c] ) )

#define DH1R( m, sl, sr, a, b, c ) \
_mm256_add_epi32( \
_mm256_xor_si256( M[m], \
_mm256_xor_si256( _mm256_srli_epi32( xh, sl ), \
_mm256_slli_epi32( qt[a], sr ) ) ), \
_mm256_xor_si256( _mm256_xor_si256( xl, qt[b] ), qt[c] ) )

#define DH2L( m, rl, sl, h, a, b, c ) \
_mm256_add_epi32( _mm256_add_epi32( \
mm256_rol_32( dH[h], rl ), \
_mm256_xor_si256( _mm256_xor_si256( xh, qt[a] ), M[m] )), \
_mm256_xor_si256( _mm256_slli_epi32( xl, sl ), \
_mm256_xor_si256( qt[b], qt[c] ) ) );

#define DH2R( m, rl, sr, h, a, b, c ) \
_mm256_add_epi32( _mm256_add_epi32( \
mm256_rol_32( dH[h], rl ), \
_mm256_xor_si256( _mm256_xor_si256( xh, qt[a] ), M[m] )), \
_mm256_xor_si256( _mm256_srli_epi32( xl, sr ), \
_mm256_xor_si256( qt[b], qt[c] ) ) );

dH[ 0] = DH1L( 0, 5, 5, 16, 24, 0 );
dH[ 1] = DH1R( 1, 7, 8, 17, 25, 1 );
dH[ 2] = DH1R( 2, 5, 5, 18, 26, 2 );
dH[ 3] = DH1R( 3, 1, 5, 19, 27, 3 );
dH[ 4] = DH1R( 4, 3, 0, 20, 28, 4 );
dH[ 5] = DH1L( 5, 6, 6, 21, 29, 5 );
dH[ 6] = DH1R( 6, 4, 6, 22, 30, 6 );
dH[ 7] = DH1R( 7, 11, 2, 23, 31, 7 );
dH[ 8] = DH2L( 8, 9, 8, 4, 24, 23, 8 );
dH[ 9] = DH2R( 9, 10, 6, 5, 25, 16, 9 );
dH[10] = DH2L( 10, 11, 6, 6, 26, 17, 10 );
dH[11] = DH2L( 11, 12, 4, 7, 27, 18, 11 );
dH[12] = DH2R( 12, 13, 3, 0, 28, 19, 12 );
dH[13] = DH2R( 13, 14, 4, 1, 29, 20, 13 );
dH[14] = DH2R( 14, 15, 7, 2, 30, 21, 14 );
dH[15] = DH2R( 15, 16, 2, 3, 31, 22, 15 );

#undef DH1L
#undef DH1R
#undef DH2L
#undef DH2R

/*
dH[ 0] = _mm256_add_epi32(
_mm256_xor_si256( M[0],
_mm256_xor_si256( _mm256_slli_epi32( xh, 5 ),
Expand Down Expand Up @@ -954,6 +1005,7 @@ void compress_small_8way( const __m256i *M, const __m256i H[16],
_mm256_xor_si256( _mm256_xor_si256( xh, qt[31] ), M[15] )),
_mm256_xor_si256( _mm256_srli_epi32( xl, 2 ),
_mm256_xor_si256( qt[22], qt[15] ) ) );
*/
}

static const __m256i final_s8[16] =
Expand Down
Loading

0 comments on commit c65b0ff

Please sign in to comment.