Skip to content

Commit

Permalink
v3.15.1
Browse files Browse the repository at this point in the history
  • Loading branch information
JayDDee committed Nov 9, 2020
1 parent c85fb38 commit 4fa8fce
Show file tree
Hide file tree
Showing 18 changed files with 100 additions and 115 deletions.
6 changes: 6 additions & 0 deletions RELEASE_NOTES
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,12 @@ If not what makes it happen or not happen?
Change Log
----------

v3.15.1

Fix compile on AMD Zen3 CPUs with VAES.
Force new work immediately after solving a block solo.


v3.15.0

Fugue optimized with AES, improves many sha3 algos.
Expand Down
20 changes: 9 additions & 11 deletions algo/fugue/fugue-aesni.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,11 @@ MYALIGN const unsigned long long _supermix4c[] = {0x0706050403020000, 0x03020000
MYALIGN const unsigned long long _supermix7a[] = {0x010c0b060d080702, 0x0904030e03000104};
MYALIGN const unsigned long long _supermix7b[] = {0x8080808080808080, 0x0504070605040f06};
MYALIGN const unsigned long long _k_n[] = {0x4E4E4E4E4E4E4E4E, 0x1B1B1B1B0E0E0E0E};
MYALIGN const unsigned int _maskd3n[] = {0xffffffff, 0xffffffff, 0xffffffff, 0x00000000};
MYALIGN const unsigned char _shift_one_mask[] = {7, 4, 5, 6, 11, 8, 9, 10, 15, 12, 13, 14, 3, 0, 1, 2};
MYALIGN const unsigned char _shift_four_mask[] = {13, 14, 15, 12, 1, 2, 3, 0, 5, 6, 7, 4, 9, 10, 11, 8};
MYALIGN const unsigned char _shift_seven_mask[] = {10, 11, 8, 9, 14, 15, 12, 13, 2, 3, 0, 1, 6, 7, 4, 5};
MYALIGN const unsigned char _aes_shift_rows[] = {0, 5, 10, 15, 4, 9, 14, 3, 8, 13, 2, 7, 12, 1, 6, 11};
MYALIGN const unsigned int _inv_shift_rows[] = {0x070a0d00, 0x0b0e0104, 0x0f020508, 0x0306090c};
MYALIGN const unsigned int _zero[] = {0x00000000, 0x00000000, 0x00000000, 0x00000000};
MYALIGN const unsigned int _mul2mask[] = {0x1b1b0000, 0x00000000, 0x00000000, 0x00000000};
MYALIGN const unsigned int _mul4mask[] = {0x2d361b00, 0x00000000, 0x00000000, 0x00000000};
MYALIGN const unsigned int _lsbmask2[] = {0x03030303, 0x03030303, 0x03030303, 0x03030303};
Expand All @@ -61,7 +59,7 @@ MYALIGN const unsigned int _IV512[] = {

#define UNPACK_S0(s0, s1, t1)\
s1 = _mm_castps_si128(_mm_insert_ps(_mm_castsi128_ps(s1), _mm_castsi128_ps(s0), 0xc0));\
s0 = _mm_and_si128(s0, M128(_maskd3n))
s0 = mm128_mask_32( s0, 8 )

#define CMIX(s1, s2, r1, r2, t1, t2)\
t1 = s1;\
Expand All @@ -78,7 +76,7 @@ MYALIGN const unsigned int _IV512[] = {
#define UNPACK_S0(s0, s1, t1)\
t1 = _mm_shuffle_epi32(s0, _MM_SHUFFLE(3, 3, 3, 3));\
s1 = _mm_castps_si128(_mm_move_ss(_mm_castsi128_ps(s1), _mm_castsi128_ps(t1)));\
s0 = _mm_and_si128(s0, M128(_maskd3n))
s0 = mm128_mask_32( s0, 8 )

#define CMIX(s1, s2, r1, r2, t1, t2)\
t1 = _mm_shuffle_epi32(s1, 0xf9);\
Expand Down Expand Up @@ -138,7 +136,7 @@ MYALIGN const unsigned int _IV512[] = {

#define SUBSTITUTE(r0, _t1, _t2, _t3, _t0)\
_t2 = _mm_shuffle_epi8(r0, M128(_inv_shift_rows));\
_t2 = _mm_aesenclast_si128(_t2, M128(_zero))
_t2 = _mm_aesenclast_si128( _t2, m128_zero )

#define SUPERMIX(t0, t1, t2, t3, t4)\
PRESUPERMIX(t0, t1, t2, t3, t4);\
Expand Down Expand Up @@ -181,14 +179,14 @@ MYALIGN const unsigned int _IV512[] = {
SUPERMIX(_t2, _t3, _t0, _t1, r1c);\
_t0 = _mm_shuffle_epi32(r1c, 0x39);\
r2c = _mm_xor_si128(r2c, _t0);\
_t0 = _mm_and_si128(_t0, M128(_maskd3n));\
_t0 = mm128_mask_32( _t0, 8 ); \
r2d = _mm_xor_si128(r2d, _t0);\
UNPACK_S0(r1c, r1a, _t3);\
SUBSTITUTE(r2c, _t1, _t2, _t3, _t0);\
SUPERMIX(_t2, _t3, _t0, _t1, r2c);\
_t0 = _mm_shuffle_epi32(r2c, 0x39);\
r3c = _mm_xor_si128(r3c, _t0);\
_t0 = _mm_and_si128(_t0, M128(_maskd3n));\
_t0 = mm128_mask_32( _t0, 8 ); \
r3d = _mm_xor_si128(r3d, _t0);\
UNPACK_S0(r2c, r2a, _t3);\
SUBSTITUTE(r3c, _t1, _t2, _t3, _t0);\
Expand All @@ -203,21 +201,21 @@ MYALIGN const unsigned int _IV512[] = {
SUPERMIX(_t2, _t3, _t0, _t1, r1c);\
_t0 = _mm_shuffle_epi32(r1c, 0x39);\
r2c = _mm_xor_si128(r2c, _t0);\
_t0 = _mm_and_si128(_t0, M128(_maskd3n));\
_t0 = mm128_mask_32( _t0, 8 ); \
r2d = _mm_xor_si128(r2d, _t0);\
UNPACK_S0(r1c, r1a, _t3);\
SUBSTITUTE(r2c, _t1, _t2, _t3, _t0);\
SUPERMIX(_t2, _t3, _t0, _t1, r2c);\
_t0 = _mm_shuffle_epi32(r2c, 0x39);\
r3c = _mm_xor_si128(r3c, _t0);\
_t0 = _mm_and_si128(_t0, M128(_maskd3n));\
_t0 = mm128_mask_32( _t0, 8 ); \
r3d = _mm_xor_si128(r3d, _t0);\
UNPACK_S0(r2c, r2a, _t3);\
SUBSTITUTE(r3c, _t1, _t2, _t3, _t0);\
SUPERMIX(_t2, _t3, _t0, _t1, r3c);\
_t0 = _mm_shuffle_epi32(r3c, 0x39);\
r4c = _mm_xor_si128(r4c, _t0);\
_t0 = _mm_and_si128(_t0, M128(_maskd3n));\
_t0 = mm128_mask_32( _t0, 8 ); \
r4d = _mm_xor_si128(r4d, _t0);\
UNPACK_S0(r3c, r3a, _t3);\
SUBSTITUTE(r4c, _t1, _t2, _t3, _t0);\
Expand Down Expand Up @@ -462,7 +460,7 @@ HashReturn fugue512_Init(hashState_fugue *ctx, int nHashSize)
ctx->uBlockLength = 4;

for(i = 0; i < 6; i++)
ctx->state[i] = _mm_setzero_si128();
ctx->state[i] = m128_zero;

ctx->state[6] = _mm_load_si128((__m128i*)_IV512 + 0);
ctx->state[7] = _mm_load_si128((__m128i*)_IV512 + 1);
Expand Down
2 changes: 1 addition & 1 deletion algo/fugue/fugue-aesni.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
#if defined(__AES__)

#include "algo/sha/sha3_common.h"
#include <x86intrin.h>
#include "simd-utils.h"


typedef struct
Expand Down
4 changes: 2 additions & 2 deletions algo/groestl/groestl256-intr-4way.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,13 @@
* This code is placed in the public domain
*/


#if !defined(GROESTL256_INTR_4WAY_H__)
#define GROESTL256_INTR_4WAY_H__ 1

#include "groestl256-hash-4way.h"

#if defined(__VAES__)
#if defined(__VAES__) && defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)

static const __m128i round_const_l0[] __attribute__ ((aligned (64))) =
{
{ 0x7060504030201000, 0xffffffffffffffff },
Expand Down
3 changes: 1 addition & 2 deletions algo/groestl/groestl512-intr-4way.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,12 @@
* This code is placed in the public domain
*/


#if !defined(GROESTL512_INTR_4WAY_H__)
#define GROESTL512_INTR_4WAY_H__ 1

#include "groestl512-hash-4way.h"

#if defined(__VAES__)
#if defined(__VAES__) && defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)

static const __m128i round_const_p[] __attribute__ ((aligned (64))) =
{
Expand Down
2 changes: 1 addition & 1 deletion algo/lyra2/phi2-4way.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
#include "algo/gost/sph_gost.h"
#include "algo/cubehash/cubehash_sse2.h"
#include "lyra2.h"
#if defined(__VAES__)
#if defined(__VAES__) && defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
#include "algo/echo/echo-hash-4way.h"
#elif defined(__AES__)
#include "algo/echo/aes_ni/hash_api.h"
Expand Down
1 change: 0 additions & 1 deletion algo/x13/phi1612.c
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@ void init_phi1612_ctx()
sph_skein512_init( &phi_ctx.skein );
sph_jh512_init( &phi_ctx.jh );
cubehashInit( &phi_ctx.cube, 512, 16, 32 );
sph_fugue512_init( &phi_ctx.fugue );
sph_gost512_init( &phi_ctx.gost );
#ifdef __AES__
init_echo( &phi_ctx.echo, 512 );
Expand Down
3 changes: 3 additions & 0 deletions algo/x16/minotaur.c
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include <stdio.h>
#include "algo/blake/sph_blake.h"
#include "algo/bmw/sph_bmw.h"
//#include "algo/jh/jh-hash-sse2.h"
#include "algo/jh/sph_jh.h"
#include "algo/keccak/sph_keccak.h"
#include "algo/skein/sph_skein.h"
Expand Down Expand Up @@ -49,6 +50,7 @@ struct TortureGarden
sph_blake512_context blake;
sph_bmw512_context bmw;
sph_skein512_context skein;
// jh512_sse2_hashState jh;
sph_jh512_context jh;
sph_keccak512_context keccak;
hashState_luffa luffa;
Expand Down Expand Up @@ -125,6 +127,7 @@ static void get_hash( void *output, const void *input, TortureGarden *garden,
SHA512_Final( (unsigned char*)hash, &garden->sha512 );
break;
case 8:
// jh512_sse2_full( &garden->jh, hash, input, 64 );
sph_jh512_init(&garden->jh);
sph_jh512(&garden->jh, input, 64);
sph_jh512_close(&garden->jh, hash);
Expand Down
12 changes: 11 additions & 1 deletion build-allarch.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
# during develpment. However the information contained may provide compilation
# tips to users.

rm cpuminer-avx512-sha-vaes cpuminer-avx512 cpuminer-avx2 cpuminer-aes-avx cpuminer-aes-sse42 cpuminer-sse42 cpuminer-ssse3 cpuminer-sse2 cpuminer-zen > /dev/null
rm cpuminer-avx512-sha-vaes cpuminer-avx512 cpuminer-avx2 cpuminer-aes-avx cpuminer-aes-sse42 cpuminer-sse42 cpuminer-ssse3 cpuminer-sse2 cpuminer-zen cpuminer-zen3 > /dev/null

make distclean || echo clean
rm -f config.status
Expand Down Expand Up @@ -87,6 +87,16 @@ mv cpuminer.exe cpuminer-zen.exe
strip -s cpuminer
mv cpuminer cpuminer-zen

make clean || echo done
rm -f config.status
CFLAGS="-O3 -march=znver2 -mvaes -Wall -fno-common" ./configure --with-curl
# CFLAGS="-O3 -march=znver3 -Wall -fno-common" ./configure --with-curl
make -j 8
strip -s cpuminer.exe
mv cpuminer.exe cpuminer-zen3.exe
strip -s cpuminer
mv cpuminer cpuminer-zen3

make clean || echo done
rm -f config.status
CFLAGS="-O3 -march=native -Wall -fno-common" ./configure --with-curl
Expand Down
7 changes: 3 additions & 4 deletions clean-all.sh
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
#!/bin/bash
#
# imake clean and rm all the targetted executables.
# tips to users.
# make clean and rm all the targetted executables.

rm cpuminer-avx512-sha-vaes cpuminer-avx512 cpuminer-avx2 cpuminer-avx cpuminer-aes-sse42 cpuminer-sse2 cpuminer-zen cpuminer-sse42 cpuminer-ssse3 > /dev/null
rm cpuminer-avx512-sha-vaes cpuminer-avx512 cpuminer-avx2 cpuminer-avx cpuminer-aes-sse42 cpuminer-sse2 cpuminer-zen cpuminer-sse42 cpuminer-ssse3 cpuminer-zen3 > /dev/null

rm cpuminer-avx512-sha-vaes.exe cpuminer-avx512.exe cpuminer-avx2.exe cpuminer-avx.exe cpuminer-aes-sse42.exe cpuminer-sse2.exe cpuminer-zen.exe cpuminer-sse42 cpuminer-ssse3 > /dev/null
rm cpuminer-avx512-sha-vaes.exe cpuminer-avx512.exe cpuminer-avx2.exe cpuminer-avx.exe cpuminer-aes-sse42.exe cpuminer-sse2.exe cpuminer-zen.exe cpuminer-sse42.exe cpuminer-ssse3.exe cpuminer-zen3.exe > /dev/null

make distclean > /dev/null
20 changes: 10 additions & 10 deletions configure
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.15.0.
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.15.1.
#
#
# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
Expand Down Expand Up @@ -577,8 +577,8 @@ MAKEFLAGS=
# Identity of this package.
PACKAGE_NAME='cpuminer-opt'
PACKAGE_TARNAME='cpuminer-opt'
PACKAGE_VERSION='3.15.0'
PACKAGE_STRING='cpuminer-opt 3.15.0'
PACKAGE_VERSION='3.15.1'
PACKAGE_STRING='cpuminer-opt 3.15.1'
PACKAGE_BUGREPORT=''
PACKAGE_URL=''

Expand Down Expand Up @@ -1332,7 +1332,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
\`configure' configures cpuminer-opt 3.15.0 to adapt to many kinds of systems.
\`configure' configures cpuminer-opt 3.15.1 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
Expand Down Expand Up @@ -1404,7 +1404,7 @@ fi

if test -n "$ac_init_help"; then
case $ac_init_help in
short | recursive ) echo "Configuration of cpuminer-opt 3.15.0:";;
short | recursive ) echo "Configuration of cpuminer-opt 3.15.1:";;
esac
cat <<\_ACEOF
Expand Down Expand Up @@ -1509,7 +1509,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
cpuminer-opt configure 3.15.0
cpuminer-opt configure 3.15.1
generated by GNU Autoconf 2.69
Copyright (C) 2012 Free Software Foundation, Inc.
Expand Down Expand Up @@ -2012,7 +2012,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
It was created by cpuminer-opt $as_me 3.15.0, which was
It was created by cpuminer-opt $as_me 3.15.1, which was
generated by GNU Autoconf 2.69. Invocation command line was
$ $0 $@
Expand Down Expand Up @@ -2993,7 +2993,7 @@ fi
# Define the identity of the package.
PACKAGE='cpuminer-opt'
VERSION='3.15.0'
VERSION='3.15.1'
cat >>confdefs.h <<_ACEOF
Expand Down Expand Up @@ -6690,7 +6690,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
This file was extended by cpuminer-opt $as_me 3.15.0, which was
This file was extended by cpuminer-opt $as_me 3.15.1, which was
generated by GNU Autoconf 2.69. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
Expand Down Expand Up @@ -6756,7 +6756,7 @@ _ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
ac_cs_version="\\
cpuminer-opt config.status 3.15.0
cpuminer-opt config.status 3.15.1
configured by $0, generated by GNU Autoconf 2.69,
with options \\"\$ac_cs_config\\"
Expand Down
2 changes: 1 addition & 1 deletion configure.ac
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
AC_INIT([cpuminer-opt], [3.15.0])
AC_INIT([cpuminer-opt], [3.15.1])

AC_PREREQ([2.59c])
AC_CANONICAL_SYSTEM
Expand Down
19 changes: 10 additions & 9 deletions cpu-miner.c
Original file line number Diff line number Diff line change
Expand Up @@ -1829,10 +1829,11 @@ bool submit_solution( struct work *work, const void *hash,
update_submit_stats( work, hash );

if unlikely( !have_stratum && !have_longpoll )
{ // block solved, force getwork
{ // solo, block solved, force getwork
pthread_rwlock_wrlock( &g_work_lock );
g_work_time = 0;
pthread_rwlock_unlock( &g_work_lock );
restart_threads();
}

if ( !opt_quiet )
Expand Down Expand Up @@ -1868,25 +1869,25 @@ static bool wanna_mine(int thr_id)
bool state = true;

if (opt_max_temp > 0.0)
{
{
float temp = cpu_temp(0);
if (temp > opt_max_temp)
{
{
if (!thr_id && !conditional_state[thr_id] && !opt_quiet)
applog(LOG_INFO, "temperature too high (%.0fC), waiting...", temp);
state = false;
}
}
if (opt_max_diff > 0.0 && net_diff > opt_max_diff)
{
{
if (!thr_id && !conditional_state[thr_id] && !opt_quiet)
applog(LOG_INFO, "network diff too high, waiting...");
state = false;
}
if (opt_max_rate > 0.0 && net_hashrate > opt_max_rate)
{
{
if (!thr_id && !conditional_state[thr_id] && !opt_quiet)
{
{
char rate[32];
format_hashrate(opt_max_rate, rate);
applog(LOG_INFO, "network hashrate too high, waiting %s...", rate);
Expand All @@ -1903,7 +1904,7 @@ static bool wanna_mine(int thr_id)
// default
void sha256d_gen_merkle_root( char* merkle_root, struct stratum_ctx* sctx )
{
sha256d(merkle_root, sctx->job.coinbase, (int) sctx->job.coinbase_size);
sha256d( merkle_root, sctx->job.coinbase, (int) sctx->job.coinbase_size );
for ( int i = 0; i < sctx->job.merkle_count; i++ )
{
memcpy( merkle_root + 32, sctx->job.merkle[i], 32 );
Expand Down Expand Up @@ -2038,7 +2039,7 @@ static void stratum_gen_work( struct stratum_ctx *sctx, struct work *g_work )
{
unsigned char *xnonce2str = abin2hex( g_work->xnonce2,
g_work->xnonce2_len );
applog( LOG_INFO, "Extranonce %s, Block %d, Net Diff %.5g",
applog( LOG_INFO, "Extranonce2 %s, Block %d, Net Diff %.5g",
xnonce2str, sctx->block_height, net_diff );
free( xnonce2str );
}
Expand Down Expand Up @@ -3509,7 +3510,7 @@ bool check_cpu_capability ()
use_avx2 = cpu_has_avx2 && sw_has_avx2 && algo_has_avx2;
use_avx512 = cpu_has_avx512 && sw_has_avx512 && algo_has_avx512;
use_sha = cpu_has_sha && sw_has_sha && algo_has_sha;
use_vaes = cpu_has_vaes && sw_has_vaes && algo_has_vaes;
use_vaes = cpu_has_vaes && sw_has_vaes && algo_has_vaes && use_avx512;
use_none = !( use_sse2 || use_aes || use_sse42 || use_avx512 || use_avx2 ||
use_sha || use_vaes );

Expand Down
Loading

0 comments on commit 4fa8fce

Please sign in to comment.