Skip to content

Commit

Permalink
v3.16.2
Browse files Browse the repository at this point in the history
  • Loading branch information
JayDDee committed Apr 8, 2021
1 parent 902ec04 commit f3333b0
Show file tree
Hide file tree
Showing 17 changed files with 827 additions and 337 deletions.
1 change: 1 addition & 0 deletions Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,7 @@ cpuminer_SOURCES = \
algo/verthash/Verthash.c \
algo/verthash/fopen_utf8.c \
algo/verthash/tiny_sha3/sha3.c \
algo/verthash/tiny_sha3/sha3-4way.c \
algo/whirlpool/sph_whirlpool.c \
algo/whirlpool/whirlpool-hash-4way.c \
algo/whirlpool/whirlpool-gate.c \
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ Supported Algorithms
x14 X14
x15 X15
x16r
x16rv2 Ravencoin (RVN)
x16rv2
x16rt Gincoin (GIN)
x16rt-veil Veil (VEIL)
x16s Pigeoncoin (PGN)
Expand Down
12 changes: 8 additions & 4 deletions RELEASE_NOTES
Original file line number Diff line number Diff line change
Expand Up @@ -65,23 +65,27 @@ If not what makes it happen or not happen?
Change Log
----------

v3.16.2

Verthash: midstate prehash optimization for all architectures.
Verthash: AVX2 optimization.
GBT: added support for Bech32 addresses, untested.
Linux: added CPU frequency to benchmark log.
Fixed integer overflow in time calculations.

v3.16.1

New options for verthash:
--data-file to specify the name, and optionally the path, of the verthash
data file, default is "verthash.dat" in the current directory.
--verify to perform the data file integrity check at startup, default is
not to verify data file integrity.

Support for creation of default verthash data file if:
1) --data-file option is not used,
2) no default data file is found in the current directory, and,
3) --verify option is used.

More detailed logs related to verthash data file.

Small verthash performance improvement.

Fixed detection of corrupt stats caused by networking issues.

v3.16.0
Expand Down
136 changes: 83 additions & 53 deletions algo/verthash/Verthash.c
Original file line number Diff line number Diff line change
Expand Up @@ -134,87 +134,117 @@ static inline uint32_t fnv1a(const uint32_t a, const uint32_t b)
return (a ^ b) * 0x1000193;
}

void verthash_hash(const unsigned char* blob_bytes,
const size_t blob_size,
const unsigned char(*input)[VH_HEADER_SIZE],
unsigned char(*output)[VH_HASH_OUT_SIZE])
void verthash_hash( const unsigned char* blob_bytes,
const size_t blob_size,
const unsigned char(*input)[VH_HEADER_SIZE],
unsigned char(*output)[VH_HASH_OUT_SIZE] )
{
unsigned char p1[VH_HASH_OUT_SIZE] __attribute__ ((aligned (64)));
sha3(&input[0], VH_HEADER_SIZE, &p1[0], VH_HASH_OUT_SIZE);

unsigned char p0[VH_N_SUBSET];

unsigned char input_header[VH_HEADER_SIZE] __attribute__ ((aligned (64)));
memcpy(input_header, input, VH_HEADER_SIZE);

for (size_t i = 0; i < VH_N_ITER; ++i)
{
input_header[0] += 1;
sha3(&input_header[0], VH_HEADER_SIZE, p0 + i * VH_P0_SIZE, VH_P0_SIZE);
}

uint32_t* p0_index = (uint32_t*)p0;
unsigned char p1[ VH_HASH_OUT_SIZE ] __attribute__ ((aligned (64)));
unsigned char p0[ VH_N_SUBSET ] __attribute__ ((aligned (64)));
uint32_t seek_indexes[VH_N_INDEXES] __attribute__ ((aligned (64)));
uint32_t* p0_index = (uint32_t*)p0;

verthash_sha3_512_final_8( p0, ( (uint64_t*)input )[ 9 ] );

for ( size_t x = 0; x < VH_N_ROT; ++x )
{
memcpy( seek_indexes + x * (VH_N_SUBSET / sizeof(uint32_t)),
p0, VH_N_SUBSET);

//#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
// 512 bit vector processing is actually slower because it reduces the CPU
// clock significantly, which also slows mem access. The AVX512 rol instruction
// is still available for smaller vectors.

// for ( size_t y = 0; y < VH_N_SUBSET / sizeof(uint32_t); y += 16 )
// {
// __m512i *p0_v = (__m512i*)( p0_index + y );
// *p0_v = mm512_rol_32( *p0_v, 1 );
// }

#if defined(__AVX2__)

for ( size_t y = 0; y < VH_N_SUBSET / sizeof(uint32_t); y += 8 )
for ( size_t y = 0; y < VH_N_SUBSET / sizeof(__m256i); y += 8)
{
__m256i *p0_v = (__m256i*)( p0_index + y );
*p0_v = mm256_rol_32( *p0_v, 1 );
casti_m256i( p0_index, y ) = mm256_rol_32(
casti_m256i( p0_index, y ), 1 );
casti_m256i( p0_index, y+1 ) = mm256_rol_32(
casti_m256i( p0_index, y+1 ), 1 );
casti_m256i( p0_index, y+2 ) = mm256_rol_32(
casti_m256i( p0_index, y+2 ), 1 );
casti_m256i( p0_index, y+3 ) = mm256_rol_32(
casti_m256i( p0_index, y+3 ), 1 );
casti_m256i( p0_index, y+4 ) = mm256_rol_32(
casti_m256i( p0_index, y+4 ), 1 );
casti_m256i( p0_index, y+5 ) = mm256_rol_32(
casti_m256i( p0_index, y+5 ), 1 );
casti_m256i( p0_index, y+6 ) = mm256_rol_32(
casti_m256i( p0_index, y+6 ), 1 );
casti_m256i( p0_index, y+7 ) = mm256_rol_32(
casti_m256i( p0_index, y+7 ), 1 );
}

#else

for ( size_t y = 0; y < VH_N_SUBSET / sizeof(uint32_t); y += 4 )
for ( size_t y = 0; y < VH_N_SUBSET / sizeof(__m128i); y += 8)
{
__m128i *p0_v = (__m128i*)( p0_index + y );
*p0_v = mm128_rol_32( *p0_v, 1 );
casti_m128i( p0_index, y ) = mm128_rol_32(
casti_m128i( p0_index, y ), 1 );
casti_m128i( p0_index, y+1 ) = mm128_rol_32(
casti_m128i( p0_index, y+1 ), 1 );
casti_m128i( p0_index, y+2 ) = mm128_rol_32(
casti_m128i( p0_index, y+2 ), 1 );
casti_m128i( p0_index, y+3 ) = mm128_rol_32(
casti_m128i( p0_index, y+3 ), 1 );
casti_m128i( p0_index, y+4 ) = mm128_rol_32(
casti_m128i( p0_index, y+4 ), 1 );
casti_m128i( p0_index, y+5 ) = mm128_rol_32(
casti_m128i( p0_index, y+5 ), 1 );
casti_m128i( p0_index, y+6 ) = mm128_rol_32(
casti_m128i( p0_index, y+6 ), 1 );
casti_m128i( p0_index, y+7 ) = mm128_rol_32(
casti_m128i( p0_index, y+7 ), 1 );
}

#endif

// for (size_t y = 0; y < VH_N_SUBSET / sizeof(uint32_t); ++y)
// {
// *(p0_index + y) = ( *(p0_index + y) << 1 )
// | ( 1 & (*(p0_index + y) >> 31) );
// }
}

sha3( &input[0], VH_HEADER_SIZE, &p1[0], VH_HASH_OUT_SIZE );

uint32_t* p1_32 = (uint32_t*)p1;
uint32_t* blob_bytes_32 = (uint32_t*)blob_bytes;
uint32_t value_accumulator = 0x811c9dc5;
const uint32_t mdiv = ((blob_size - VH_HASH_OUT_SIZE) / VH_BYTE_ALIGNMENT) + 1;
for (size_t i = 0; i < VH_N_INDEXES; i++)
const uint32_t mdiv = ( ( blob_size - VH_HASH_OUT_SIZE )
/ VH_BYTE_ALIGNMENT ) + 1;
#if defined (__AVX2__)
const __m256i k = _mm256_set1_epi32( 0x1000193 );
#elif defined(__SSE41__)
const __m128i k = _mm_set1_epi32( 0x1000193 );
#endif

for ( size_t i = 0; i < VH_N_INDEXES; i++ )
{
const uint32_t offset = (fnv1a(seek_indexes[i], value_accumulator) % mdiv) * VH_BYTE_ALIGNMENT / sizeof(uint32_t);
const uint32_t offset =
( fnv1a( seek_indexes[i], value_accumulator) % mdiv )
* ( VH_BYTE_ALIGNMENT / sizeof(uint32_t) );
const uint32_t *blob_off = blob_bytes_32 + offset;
for (size_t i2 = 0; i2 < VH_HASH_OUT_SIZE / sizeof(uint32_t); i2++)
{
const uint32_t value = *( blob_off + i2 );
uint32_t* p1_ptr = p1_32 + i2;
*p1_ptr = fnv1a( *p1_ptr, value );
value_accumulator = fnv1a( value_accumulator, value );
}

// update value accumulator for next seek index
value_accumulator = fnv1a( value_accumulator, blob_off[0] );
value_accumulator = fnv1a( value_accumulator, blob_off[1] );
value_accumulator = fnv1a( value_accumulator, blob_off[2] );
value_accumulator = fnv1a( value_accumulator, blob_off[3] );
value_accumulator = fnv1a( value_accumulator, blob_off[4] );
value_accumulator = fnv1a( value_accumulator, blob_off[5] );
value_accumulator = fnv1a( value_accumulator, blob_off[6] );
value_accumulator = fnv1a( value_accumulator, blob_off[7] );

#if defined (__AVX2__)
*(__m256i*)p1_32 = _mm256_mullo_epi32( _mm256_xor_si256(
*(__m256i*)p1_32, *(__m256i*)blob_off ), k );
#elif defined(__SSE41__)
casti_m128i( p1_32, 0 ) = _mm_mullo_epi32( _mm_xor_si128(
casti_m128i( p1_32, 0 ), casti_m128i( blob_off, 0 ) ), k );
casti_m128i( p1_32, 1 ) = _mm_mullo_epi32( _mm_xor_si128(
casti_m128i( p1_32, 1 ), casti_m128i( blob_off, 1 ) ), k );
#else
for ( size_t i2 = 0; i2 < VH_HASH_OUT_SIZE / sizeof(uint32_t); i2++ )
p1_32[i2] = fnv1a( p1_32[i2], blob_off[i2] );
#endif

}

memcpy(output, p1, VH_HASH_OUT_SIZE);
memcpy( output, p1, VH_HASH_OUT_SIZE );
}

//-----------------------------------------------------------------------------
Expand Down
2 changes: 2 additions & 0 deletions algo/verthash/Verthash.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ void verthash_hash(const unsigned char* blob_bytes,
const unsigned char(*input)[VH_HEADER_SIZE],
unsigned char(*output)[VH_HASH_OUT_SIZE]);

void verthash_sha3_512_prehash_72( const void *input );
void verthash_sha3_512_final_8( void *hash, const uint64_t nonce );

#endif // !Verthash_INCLUDE_ONCE

Loading

0 comments on commit f3333b0

Please sign in to comment.