diff --git a/Makefile.am b/Makefile.am index 90d737f5..4da1cee2 100644 --- a/Makefile.am +++ b/Makefile.am @@ -250,6 +250,7 @@ cpuminer_SOURCES = \ algo/x16/x16rt.c \ algo/x16/x16rt-4way.c \ algo/x16/hex.c \ + algo/x16/x20r.c \ algo/x16/x21s-4way.c \ algo/x16/x21s.c \ algo/x16/minotaur.c \ diff --git a/RELEASE_NOTES b/RELEASE_NOTES index 057b305a..3d232b97 100644 --- a/RELEASE_NOTES +++ b/RELEASE_NOTES @@ -75,6 +75,11 @@ If not what makes it happen or not happen? Change Log ---------- +v23.13 + +Added x20r algo. +Eliminated redundant hash order calculations for x16r family. + v23.12 Several bugs fixes and speed improvements for x16r family for all CPU architectures. diff --git a/algo-gate-api.c b/algo-gate-api.c index 1886e36c..a456c873 100644 --- a/algo-gate-api.c +++ b/algo-gate-api.c @@ -368,6 +368,7 @@ bool register_algo_gate( int algo, algo_gate_t *gate ) case ALGO_X16RT_VEIL: rc = register_x16rt_veil_algo ( gate ); break; case ALGO_X16S: rc = register_x16s_algo ( gate ); break; case ALGO_X17: rc = register_x17_algo ( gate ); break; + case ALGO_X20R: rc = register_x20r_algo ( gate ); break; case ALGO_X21S: rc = register_x21s_algo ( gate ); break; case ALGO_X22I: rc = register_x22i_algo ( gate ); break; case ALGO_X25X: rc = register_x25x_algo ( gate ); break; diff --git a/algo/x16/x16r-4way.c b/algo/x16/x16r-4way.c index 17870c8a..300d866e 100644 --- a/algo/x16/x16r-4way.c +++ b/algo/x16/x16r-4way.c @@ -19,12 +19,12 @@ // Perform midstate prehash of hash functions with block size <= 72 bytes, // 76 bytes for hash functions that operate on 32 bit data. -void x16r_8way_prehash( void *vdata, void *pdata ) +void x16r_8way_prehash( void *vdata, void *pdata, const char *hash_order ) { uint32_t vdata2[20*8] __attribute__ ((aligned (64))); uint32_t edata[20] __attribute__ ((aligned (64))); - const char elem = x16r_hash_order[0]; + const char elem = hash_order[0]; const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0'; switch ( algo ) @@ -110,7 +110,8 @@ void x16r_8way_prehash( void *vdata, void *pdata ) // Called by wrapper hash function to optionally continue hashing and // convert to final hash. -int x16r_8way_hash_generic( void* output, const void* input, int thrid ) +int x16r_8way_hash_generic( void* output, const void* input, int thrid, + const char *hash_order, const int func_count ) { uint32_t vhash[20*8] __attribute__ ((aligned (128))); uint32_t hash0[20] __attribute__ ((aligned (16))); @@ -136,9 +137,9 @@ int x16r_8way_hash_generic( void* output, const void* input, int thrid ) dintrlv_8x64( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7, input, 640 ); - for ( int i = 0; i < 16; i++ ) + for ( int i = 0; i < func_count; i++ ) { - const char elem = x16r_hash_order[i]; + const char elem = hash_order[i]; const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0'; switch ( algo ) @@ -474,7 +475,8 @@ int x16r_8way_hash_generic( void* output, const void* input, int thrid ) int x16r_8way_hash( void* output, const void* input, int thrid ) { uint8_t hash[64*8] __attribute__ ((aligned (128))); - if ( !x16r_8way_hash_generic( hash, input, thrid ) ) + if ( !x16r_8way_hash_generic( hash, input, thrid, x16r_hash_order, + X16R_HASH_FUNC_COUNT ) ) return 0; memcpy( output, hash, 32 ); @@ -495,7 +497,6 @@ int scanhash_x16r_8way( struct work *work, uint32_t max_nonce, { uint32_t hash[16*8] __attribute__ ((aligned (128))); uint32_t vdata[20*8] __attribute__ ((aligned (64))); - uint32_t bedata1[2]; uint32_t *pdata = work->data; uint32_t *ptarget = work->target; const uint32_t first_nonce = pdata[19]; @@ -508,21 +509,18 @@ int scanhash_x16r_8way( struct work *work, uint32_t max_nonce, if ( bench ) ptarget[7] = 0x0cff; - bedata1[0] = bswap_32( pdata[1] ); - bedata1[1] = bswap_32( pdata[2] ); - - static __thread uint32_t s_ntime = UINT32_MAX; - const uint32_t ntime = bswap_32( pdata[17] ); - if ( s_ntime != ntime ) + static __thread uint32_t saved_height = UINT32_MAX; + if ( work->height != saved_height ) { - x16_r_s_getAlgoString( (const uint8_t*)bedata1, x16r_hash_order ); - s_ntime = ntime; - - if ( opt_debug && !thr_id ) - applog( LOG_INFO, "Hash order %s Ntime %08x", x16r_hash_order, ntime ); + vdata[1] = bswap_32( pdata[1] ); + vdata[2] = bswap_32( pdata[2] ); + saved_height = work->height; + x16_r_s_getAlgoString( (const uint8_t*)(&vdata[1]), x16r_hash_order ); + if ( !opt_quiet && !thr_id ) + applog( LOG_INFO, "hash order %s", x16r_hash_order ); } - x16r_8way_prehash( vdata, pdata ); + x16r_8way_prehash( vdata, pdata, x16r_hash_order ); *noncev = mm512_intrlv_blend_32( _mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0, n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev ); @@ -546,12 +544,12 @@ int scanhash_x16r_8way( struct work *work, uint32_t max_nonce, #elif defined (X16R_4WAY) -void x16r_4way_prehash( void *vdata, void *pdata ) +void x16r_4way_prehash( void *vdata, void *pdata, const char *hash_order ) { uint32_t vdata2[20*4] __attribute__ ((aligned (64))); uint32_t edata[20] __attribute__ ((aligned (64))); - const char elem = x16r_hash_order[0]; + const char elem = hash_order[0]; const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0'; switch ( algo ) @@ -627,7 +625,8 @@ void x16r_4way_prehash( void *vdata, void *pdata ) } } -int x16r_4way_hash_generic( void* output, const void* input, int thrid ) +int x16r_4way_hash_generic( void* output, const void* input, int thrid, + const char *hash_order, const int func_count ) { uint32_t vhash[20*4] __attribute__ ((aligned (128))); uint32_t hash0[20] __attribute__ ((aligned (32))); @@ -644,9 +643,9 @@ int x16r_4way_hash_generic( void* output, const void* input, int thrid ) dintrlv_4x64( hash0, hash1, hash2, hash3, input, 640 ); - for ( int i = 0; i < 16; i++ ) + for ( int i = 0; i < func_count; i++ ) { - const char elem = x16r_hash_order[i]; + const char elem = hash_order[i]; const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0'; switch ( algo ) @@ -908,7 +907,8 @@ int x16r_4way_hash_generic( void* output, const void* input, int thrid ) int x16r_4way_hash( void* output, const void* input, int thrid ) { uint8_t hash[64*4] __attribute__ ((aligned (64))); - if ( !x16r_4way_hash_generic( hash, input, thrid ) ) + if ( !x16r_4way_hash_generic( hash, input, thrid, x16r_hash_order, + X16R_HASH_FUNC_COUNT ) ) return 0; memcpy( output, hash, 32 ); @@ -924,7 +924,6 @@ int scanhash_x16r_4way( struct work *work, uint32_t max_nonce, { uint32_t hash[16*4] __attribute__ ((aligned (64))); uint32_t vdata[20*4] __attribute__ ((aligned (64))); - uint32_t bedata1[2]; uint32_t *pdata = work->data; uint32_t *ptarget = work->target; const uint32_t first_nonce = pdata[19]; @@ -937,20 +936,18 @@ int scanhash_x16r_4way( struct work *work, uint32_t max_nonce, if ( bench ) ptarget[7] = 0x0cff; - bedata1[0] = bswap_32( pdata[1] ); - bedata1[1] = bswap_32( pdata[2] ); - - static __thread uint32_t s_ntime = UINT32_MAX; - const uint32_t ntime = bswap_32( pdata[17] ); - if ( s_ntime != ntime ) + static __thread uint32_t saved_height = UINT32_MAX; + if ( work->height != saved_height ) { - x16_r_s_getAlgoString( (const uint8_t*)bedata1, x16r_hash_order ); - s_ntime = ntime; - if ( opt_debug && !thr_id ) - applog( LOG_INFO, "Hash order %s Ntime %08x", x16r_hash_order, ntime ); + vdata[1] = bswap_32( pdata[1] ); + vdata[2] = bswap_32( pdata[2] ); + saved_height = work->height; + x16_r_s_getAlgoString( (const uint8_t*)(&vdata[1]), x16r_hash_order ); + if ( !opt_quiet && !thr_id ) + applog( LOG_INFO, "hash order %s", x16r_hash_order ); } - x16r_4way_prehash( vdata, pdata ); + x16r_4way_prehash( vdata, pdata, x16r_hash_order ); *noncev = mm256_intrlv_blend_32( _mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev ); do @@ -973,10 +970,10 @@ int scanhash_x16r_4way( struct work *work, uint32_t max_nonce, #elif defined (X16R_2WAY) -void x16r_2x64_prehash( void *vdata, void *pdata ) +void x16r_2x64_prehash( void *vdata, void *pdata, const char *hash_order ) { uint32_t edata[20] __attribute__ ((aligned (64))); - const char elem = x16r_hash_order[0]; + const char elem = hash_order[0]; const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0'; switch ( algo ) @@ -1051,7 +1048,8 @@ void x16r_2x64_prehash( void *vdata, void *pdata ) } } -int x16r_2x64_hash_generic( void* output, const void* input, int thrid ) +int x16r_2x64_hash_generic( void* output, const void* input, int thrid, + const char *hash_order, const int func_count ) { uint32_t vhash[20*2] __attribute__ ((aligned (64))); uint32_t hash0[20] __attribute__ ((aligned (32))); @@ -1064,9 +1062,9 @@ int x16r_2x64_hash_generic( void* output, const void* input, int thrid ) dintrlv_2x64( hash0, hash1, input, 640 ); - for ( int i = 0; i < 16; i++ ) + for ( int i = 0; i < func_count; i++ ) { - const char elem = x16r_hash_order[i]; + const char elem = hash_order[i]; const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0'; switch ( algo ) @@ -1313,7 +1311,8 @@ int x16r_2x64_hash_generic( void* output, const void* input, int thrid ) int x16r_2x64_hash( void* output, const void* input, int thrid ) { uint8_t hash[64*2] __attribute__ ((aligned (64))); - if ( !x16r_2x64_hash_generic( hash, input, thrid ) ) + if ( !x16r_2x64_hash_generic( hash, input, thrid, x16r_hash_order, + X16R_HASH_FUNC_COUNT ) ) return 0; memcpy( output, hash, 32 ); @@ -1327,7 +1326,6 @@ int scanhash_x16r_2x64( struct work *work, uint32_t max_nonce, { uint32_t hash[16*2] __attribute__ ((aligned (64))); uint32_t vdata[20*2] __attribute__ ((aligned (64))); - uint32_t bedata1[2]; uint32_t *pdata = work->data; uint32_t *ptarget = work->target; const uint32_t first_nonce = pdata[19]; @@ -1340,20 +1338,18 @@ int scanhash_x16r_2x64( struct work *work, uint32_t max_nonce, if ( bench ) ptarget[7] = 0x0cff; - bedata1[0] = bswap_32( pdata[1] ); - bedata1[1] = bswap_32( pdata[2] ); - - static __thread uint32_t s_ntime = UINT32_MAX; - const uint32_t ntime = bswap_32( pdata[17] ); - if ( s_ntime != ntime ) + static __thread uint32_t saved_height = UINT32_MAX; + if ( work->height != saved_height ) { - x16_r_s_getAlgoString( (const uint8_t*)bedata1, x16r_hash_order ); - s_ntime = ntime; - if ( opt_debug && !thr_id ) - applog( LOG_INFO, "Hash order %s Ntime %08x", x16r_hash_order, ntime ); + vdata[1] = bswap_32( pdata[1] ); + vdata[2] = bswap_32( pdata[2] ); + saved_height = work->height; + x16_r_s_getAlgoString( (const uint8_t*)(&vdata[1]), x16r_hash_order ); + if ( !opt_quiet && !thr_id ) + applog( LOG_INFO, "hash order %s", x16r_hash_order ); } - x16r_2x64_prehash( vdata, pdata ); + x16r_2x64_prehash( vdata, pdata, x16r_hash_order ); *noncev = v128_intrlv_blend_32( v128_set32( n+1, 0, n, 0 ), *noncev ); do { diff --git a/algo/x16/x16r-gate.c b/algo/x16/x16r-gate.c index 25dbec11..2414ca64 100644 --- a/algo/x16/x16r-gate.c +++ b/algo/x16/x16r-gate.c @@ -5,15 +5,15 @@ __thread char x16r_hash_order[ X16R_HASH_FUNC_COUNT + 1 ] = { 0 }; void (*x16_r_s_getAlgoString) ( const uint8_t*, char* ) = NULL; -#if defined (X16R_8WAY) +#if defined(X16R_8WAY) __thread x16r_8way_context_overlay x16r_ctx; -#elif defined (X16R_4WAY) +#elif defined(X16R_4WAY) __thread x16r_4way_context_overlay x16r_ctx; -#elif defined (X16R_2WAY) +#elif defined(X16R_2WAY) __thread x16r_2x64_context_overlay x16r_ctx; @@ -55,13 +55,13 @@ void x16s_getAlgoString( const uint8_t* prevblock, char *output ) bool register_x16r_algo( algo_gate_t* gate ) { -#if defined (X16R_8WAY) +#if defined(X16R_8WAY) gate->scanhash = (void*)&scanhash_x16r_8way; gate->hash = (void*)&x16r_8way_hash; -#elif defined (X16R_4WAY) +#elif defined(X16R_4WAY) gate->scanhash = (void*)&scanhash_x16r_4way; gate->hash = (void*)&x16r_4way_hash; -#elif defined (X16R_2WAY) +#elif defined(X16R_2WAY) gate->scanhash = (void*)&scanhash_x16r_2x64; gate->hash = (void*)&x16r_2x64_hash; #else @@ -77,13 +77,13 @@ bool register_x16r_algo( algo_gate_t* gate ) bool register_x16rv2_algo( algo_gate_t* gate ) { -#if defined (X16RV2_8WAY) +#if defined(X16RV2_8WAY) gate->scanhash = (void*)&scanhash_x16rv2_8way; gate->hash = (void*)&x16rv2_8way_hash; -#elif defined (X16RV2_4WAY) +#elif defined(X16RV2_4WAY) gate->scanhash = (void*)&scanhash_x16rv2_4way; gate->hash = (void*)&x16rv2_4way_hash; -#elif defined (X16RV2_2WAY) +#elif defined(X16RV2_2WAY) gate->scanhash = (void*)&scanhash_x16rv2_2x64; gate->hash = (void*)&x16rv2_2x64_hash; #else @@ -99,13 +99,13 @@ bool register_x16rv2_algo( algo_gate_t* gate ) bool register_x16s_algo( algo_gate_t* gate ) { -#if defined (X16R_8WAY) +#if defined(X16R_8WAY) gate->scanhash = (void*)&scanhash_x16r_8way; gate->hash = (void*)&x16r_8way_hash; -#elif defined (X16R_4WAY) +#elif defined(X16R_4WAY) gate->scanhash = (void*)&scanhash_x16r_4way; gate->hash = (void*)&x16r_4way_hash; -#elif defined (X16R_2WAY) +#elif defined(X16R_2WAY) gate->scanhash = (void*)&scanhash_x16r_2x64; gate->hash = (void*)&x16r_2x64_hash; #else @@ -235,13 +235,13 @@ void veil_build_extraheader( struct work* g_work, struct stratum_ctx* sctx ) bool register_x16rt_algo( algo_gate_t* gate ) { -#if defined (X16RT_8WAY) +#if defined(X16RT_8WAY) gate->scanhash = (void*)&scanhash_x16rt_8way; gate->hash = (void*)&x16r_8way_hash; -#elif defined (X16RT_4WAY) +#elif defined(X16RT_4WAY) gate->scanhash = (void*)&scanhash_x16rt_4way; gate->hash = (void*)&x16r_4way_hash; -#elif defined (X16RT_2WAY) +#elif defined(X16RT_2WAY) gate->scanhash = (void*)&scanhash_x16rt_2x64; gate->hash = (void*)&x16r_2x64_hash; #else @@ -256,13 +256,13 @@ bool register_x16rt_algo( algo_gate_t* gate ) bool register_x16rt_veil_algo( algo_gate_t* gate ) { -#if defined (X16RT_8WAY) +#if defined(X16RT_8WAY) gate->scanhash = (void*)&scanhash_x16rt_8way; gate->hash = (void*)&x16r_8way_hash; -#elif defined (X16RT_4WAY) +#elif defined(X16RT_4WAY) gate->scanhash = (void*)&scanhash_x16rt_4way; gate->hash = (void*)&x16r_4way_hash; -#elif defined (X16RT_2WAY) +#elif defined(X16RT_2WAY) gate->scanhash = (void*)&scanhash_x16rt_2x64; gate->hash = (void*)&x16r_2x64_hash; #else @@ -296,15 +296,15 @@ bool register_hex_algo( algo_gate_t* gate ) bool register_x21s_algo( algo_gate_t* gate ) { -#if defined (X21S_8WAY) +#if defined(X21S_8WAY) gate->scanhash = (void*)&scanhash_x21s_8way; gate->hash = (void*)&x21s_8way_hash; gate->miner_thread_init = (void*)&x21s_8way_thread_init; -#elif defined (X21S_4WAY) +#elif defined(X21S_4WAY) gate->scanhash = (void*)&scanhash_x21s_4way; gate->hash = (void*)&x21s_4way_hash; gate->miner_thread_init = (void*)&x21s_4way_thread_init; -#elif defined (X21S_2WAY) +#elif defined(X21S_2WAY) gate->scanhash = (void*)&scanhash_x21s_2x64; gate->hash = (void*)&x21s_2x64_hash; gate->miner_thread_init = (void*)&x21s_2x64_thread_init; diff --git a/algo/x16/x16r-gate.h b/algo/x16/x16r-gate.h index e26c3981..7302d8eb 100644 --- a/algo/x16/x16r-gate.h +++ b/algo/x16/x16r-gate.h @@ -149,18 +149,23 @@ union _x16r_8way_context_overlay hashState_echo echo; #endif } __attribute__ ((aligned (64))); +#define _x16r_8x64_context_overlay _x16r_8way_context_overlay typedef union _x16r_8way_context_overlay x16r_8way_context_overlay; +#define x16r_8x64_context_overlay x16r_8way_context_overlay extern __thread x16r_8way_context_overlay x16r_ctx; -void x16r_8way_prehash( void *, void * ); -int x16r_8way_hash_generic( void *, const void *, int ); +void x16r_8way_prehash( void *, void *, const char * ); +int x16r_8way_hash_generic( void *, const void *, int, const char*, const int ); int x16r_8way_hash( void *, const void *, int ); int scanhash_x16r_8way( struct work *, uint32_t , uint64_t *, struct thr_info * ); -extern __thread x16r_8way_context_overlay x16r_ctx; +#define x16r_8x64_prehash x16r_8way_prehash +#define x16r_8x64_hash_generic x16r_8way_hash_generic +#define x16r_8x64_hash x16r_8way_hash +#define scanhash_x16r_8x64 scanhash_x16r_8x64 #elif defined(X16R_4WAY) @@ -189,17 +194,23 @@ union _x16r_4way_context_overlay sph_whirlpool_context whirlpool; sha512_4way_context sha512; } __attribute__ ((aligned (64))); +#define _x16r_4x64_context_overlay _x16r_4way_context_overlay typedef union _x16r_4way_context_overlay x16r_4way_context_overlay; +#define x16r_4x64_context_overlay x16r_4way_context_overlay extern __thread x16r_4way_context_overlay x16r_ctx; -void x16r_4way_prehash( void *, void * ); -int x16r_4way_hash_generic( void *, const void *, int ); +void x16r_4way_prehash( void *, void *, const char * ); +int x16r_4way_hash_generic( void *, const void *, int, const char*, const int ); int x16r_4way_hash( void *, const void *, int ); int scanhash_x16r_4way( struct work *, uint32_t, uint64_t *, struct thr_info * ); -extern __thread x16r_4way_context_overlay x16r_ctx; + +#define x16r_4x64_prehash x16r_4way_prehash +#define x16r_4x64_hash_generic x16r_4way_hash_generic +#define x16r_4x64_hash x16r_4way_hash +#define scanhash_x16r_4x64 scanhash_x16r_4x64 #elif defined(X16R_2WAY) @@ -241,8 +252,8 @@ union _x16r_2x64_context_overlay typedef union _x16r_2x64_context_overlay x16r_2x64_context_overlay; -void x16r_2x64_prehash( void *, void * ); -int x16r_2x64_hash_generic( void *, const void *, int ); +void x16r_2x64_prehash( void *, void *, const char * ); +int x16r_2x64_hash_generic( void *, const void *, int, const char*, const int ); int x16r_2x64_hash( void *, const void *, int ); int scanhash_x16r_2x64( struct work *, uint32_t, uint64_t *, struct thr_info * ); @@ -288,8 +299,8 @@ typedef union _x16r_context_overlay x16r_context_overlay; extern __thread x16r_context_overlay x16r_ref_ctx; -void x16r_prehash( void *, void * ); -int x16r_hash_generic( void *, const void *, int ); +void x16r_prehash( void *, void *, const char * ); +int x16r_hash_generic( void *, const void *, int, const char*, const int ); int x16r_hash( void *, const void *, int ); int scanhash_x16r( struct work *, uint32_t, uint64_t *, struct thr_info * ); diff --git a/algo/x16/x16r.c b/algo/x16/x16r.c index e9e50fad..d05bda7f 100644 --- a/algo/x16/x16r.c +++ b/algo/x16/x16r.c @@ -10,9 +10,9 @@ #include #include -void x16r_prehash( void *edata, void *pdata ) +void x16r_prehash( void *edata, void *pdata, const char *hash_order ) { - const char elem = x16r_hash_order[0]; + const char elem = hash_order[0]; const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0'; switch ( algo ) @@ -52,17 +52,18 @@ void x16r_prehash( void *edata, void *pdata ) } } -int x16r_hash_generic( void* output, const void* input, int thrid ) +int x16r_hash_generic( void* output, const void* input, int thrid, + const char *hash_order, const int func_count ) { - uint32_t _ALIGN(128) hash[16]; + uint32_t _ALIGN(32) hash[16]; x16r_context_overlay ctx; memcpy( &ctx, &x16r_ref_ctx, sizeof(ctx) ); void *in = (void*) input; int size = 80; - for ( int i = 0; i < 16; i++ ) + for ( int i = 0; i < func_count; i++ ) { - const char elem = x16r_hash_order[i]; + const char elem = hash_order[i]; const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0'; switch ( algo ) @@ -196,7 +197,8 @@ int x16r_hash_generic( void* output, const void* input, int thrid ) int x16r_hash( void* output, const void* input, int thrid ) { uint8_t hash[64] __attribute__ ((aligned (64))); - if ( !x16r_hash_generic( hash, input, thrid ) ) + if ( !x16r_hash_generic( hash, input, thrid, x16r_hash_order, + X16R_HASH_FUNC_COUNT ) ) return 0; memcpy( output, hash, 32 ); @@ -206,8 +208,8 @@ int x16r_hash( void* output, const void* input, int thrid ) int scanhash_x16r( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ) { - uint32_t _ALIGN(128) hash32[8]; - uint32_t _ALIGN(128) edata[20]; + uint32_t _ALIGN(32) hash32[8]; + uint32_t _ALIGN(32) edata[20]; uint32_t *pdata = work->data; uint32_t *ptarget = work->target; const uint32_t first_nonce = pdata[19]; @@ -229,7 +231,7 @@ int scanhash_x16r( struct work *work, uint32_t max_nonce, applog( LOG_INFO, "hash order %s (%08x)", x16r_hash_order, ntime ); } - x16r_prehash( edata, pdata ); + x16r_prehash( edata, pdata, x16r_hash_order ); do { diff --git a/algo/x16/x16rt-4way.c b/algo/x16/x16rt-4way.c index 445bba35..12b21d49 100644 --- a/algo/x16/x16rt-4way.c +++ b/algo/x16/x16rt-4way.c @@ -30,12 +30,12 @@ int scanhash_x16rt_8way( struct work *work, uint32_t max_nonce, x16rt_getTimeHash( masked_ntime, &timeHash ); x16rt_getAlgoString( &timeHash[0], x16r_hash_order ); s_ntime = masked_ntime; - if ( !thr_id ) - applog( LOG_INFO, "Hash order %s, Ntime %08x, time hash %08x", - x16r_hash_order, bswap_32( pdata[17] ), timeHash ); + if ( !opt_quiet && !thr_id ) + applog( LOG_INFO, "Hash order %s, Ntime %08x", + x16r_hash_order, bswap_32( pdata[17] ) ); } - x16r_8way_prehash( vdata, pdata ); + x16r_8way_prehash( vdata, pdata, x16r_hash_order ); *noncev = mm512_intrlv_blend_32( _mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0, n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev ); @@ -84,12 +84,12 @@ int scanhash_x16rt_4way( struct work *work, uint32_t max_nonce, x16rt_getTimeHash( masked_ntime, &timeHash ); x16rt_getAlgoString( &timeHash[0], x16r_hash_order ); s_ntime = masked_ntime; - if ( !thr_id ) - applog( LOG_INFO, "Hash order %s, Ntime %08x, time hash %08x", - x16r_hash_order, bswap_32( pdata[17] ), timeHash ); + if ( !opt_quiet && !thr_id ) + applog( LOG_INFO, "Hash order %s, Ntime %08x", + x16r_hash_order, bswap_32( pdata[17] ) ); } - x16r_4way_prehash( vdata, pdata ); + x16r_4way_prehash( vdata, pdata, x16r_hash_order ); *noncev = mm256_intrlv_blend_32( _mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev ); do @@ -137,12 +137,12 @@ int scanhash_x16rt_2x64( struct work *work, uint32_t max_nonce, x16rt_getTimeHash( masked_ntime, &timeHash ); x16rt_getAlgoString( &timeHash[0], x16r_hash_order ); s_ntime = masked_ntime; - if ( !thr_id ) - applog( LOG_INFO, "Hash order %s, Ntime %08x, time hash %08x", - x16r_hash_order, bswap_32( pdata[17] ), timeHash ); + if ( !opt_quiet && !thr_id ) + applog( LOG_INFO, "Hash order %s, Ntime %08x", + x16r_hash_order, bswap_32( pdata[17] ) ); } - x16r_2x64_prehash( vdata, pdata ); + x16r_2x64_prehash( vdata, pdata, x16r_hash_order ); *noncev = v128_intrlv_blend_32( v128_set32( n+1, 0, n, 0 ), *noncev ); do { diff --git a/algo/x16/x16rt.c b/algo/x16/x16rt.c index 3b447c51..292c3c9b 100644 --- a/algo/x16/x16rt.c +++ b/algo/x16/x16rt.c @@ -31,7 +31,7 @@ int scanhash_x16rt( struct work *work, uint32_t max_nonce, x16r_hash_order, swab32( pdata[17] ), timeHash ); } - x16r_prehash( edata, pdata ); + x16r_prehash( edata, pdata, x16r_hash_order ); do { diff --git a/algo/x16/x16rv2-4way.c b/algo/x16/x16rv2-4way.c index d0045a7e..c271d2ca 100644 --- a/algo/x16/x16rv2-4way.c +++ b/algo/x16/x16rv2-4way.c @@ -593,7 +593,6 @@ int scanhash_x16rv2_8way( struct work *work, uint32_t max_nonce, uint32_t vdata[20*8] __attribute__ ((aligned (64))); uint32_t vdata2[20*8] __attribute__ ((aligned (64))); uint32_t edata[20] __attribute__ ((aligned (64))); - uint32_t bedata1[2] __attribute__((aligned(64))); uint32_t *pdata = work->data; uint32_t *ptarget = work->target; const uint32_t first_nonce = pdata[19]; @@ -606,19 +605,15 @@ int scanhash_x16rv2_8way( struct work *work, uint32_t max_nonce, if ( bench ) ptarget[7] = 0x0cff; - mm512_bswap32_intrlv80_8x64( vdata, pdata ); - - bedata1[0] = bswap_32( pdata[1] ); - bedata1[1] = bswap_32( pdata[2] ); - - static __thread uint32_t s_ntime = UINT32_MAX; - const uint32_t ntime = bswap_32( pdata[17] ); - if ( s_ntime != ntime ) + static __thread uint32_t saved_height = UINT32_MAX; + if ( work->height != saved_height ) { - x16_r_s_getAlgoString( (const uint8_t*)bedata1, x16r_hash_order ); - s_ntime = ntime; + vdata[1] = bswap_32( pdata[1] ); + vdata[2] = bswap_32( pdata[2] ); + saved_height = work->height; + x16_r_s_getAlgoString( (const uint8_t*)(&vdata[1]), x16r_hash_order ); if ( !opt_quiet && !thr_id ) - applog( LOG_INFO, "hash order %s (%08x)", x16r_hash_order, ntime ); + applog( LOG_INFO, "hash order %s", x16r_hash_order ); } // Do midstate prehash on hash functions with block size <= 64 bytes. @@ -1108,7 +1103,6 @@ int scanhash_x16rv2_4way( struct work *work, uint32_t max_nonce, uint32_t vdata[24*4] __attribute__ ((aligned (64))); uint32_t vdata32[20*4] __attribute__ ((aligned (64))); uint32_t edata[20]; - uint32_t bedata1[2]; uint32_t *pdata = work->data; uint32_t *ptarget = work->target; const uint32_t first_nonce = pdata[19]; @@ -1121,17 +1115,15 @@ int scanhash_x16rv2_4way( struct work *work, uint32_t max_nonce, if ( bench ) ptarget[7] = 0x0fff; - bedata1[0] = bswap_32( pdata[1] ); - bedata1[1] = bswap_32( pdata[2] ); - - static __thread uint32_t s_ntime = UINT32_MAX; - const uint32_t ntime = bswap_32(pdata[17]); - if ( s_ntime != ntime ) + static __thread uint32_t saved_height = UINT32_MAX; + if ( work->height != saved_height ) { - x16_r_s_getAlgoString( (const uint8_t*)bedata1, x16r_hash_order ); - s_ntime = ntime; + vdata[1] = bswap_32( pdata[1] ); + vdata[2] = bswap_32( pdata[2] ); + saved_height = work->height; + x16_r_s_getAlgoString( (const uint8_t*)(&vdata[1]), x16r_hash_order ); if ( !opt_quiet && !thr_id ) - applog( LOG_INFO, "hash order %s (%08x)", x16r_hash_order, ntime ); + applog( LOG_INFO, "hash order %s", x16r_hash_order ); } // Do midstate prehash on hash functions with block size <= 64 bytes. @@ -1550,7 +1542,6 @@ int scanhash_x16rv2_2x64( struct work *work, uint32_t max_nonce, uint32_t hash[2*16] __attribute__ ((aligned (64))); uint32_t vdata[24*2] __attribute__ ((aligned (64))); uint32_t edata[20]; - uint32_t bedata1[2]; uint32_t *pdata = work->data; uint32_t *ptarget = work->target; const uint32_t first_nonce = pdata[19]; @@ -1563,17 +1554,15 @@ int scanhash_x16rv2_2x64( struct work *work, uint32_t max_nonce, if ( bench ) ptarget[7] = 0x0fff; - bedata1[0] = bswap_32( pdata[1] ); - bedata1[1] = bswap_32( pdata[2] ); - - static __thread uint32_t s_ntime = UINT32_MAX; - const uint32_t ntime = bswap_32(pdata[17]); - if ( s_ntime != ntime ) + static __thread uint32_t saved_height = UINT32_MAX; + if ( work->height != saved_height ) { - x16_r_s_getAlgoString( (const uint8_t*)bedata1, x16r_hash_order ); - s_ntime = ntime; + vdata[1] = bswap_32( pdata[1] ); + vdata[2] = bswap_32( pdata[2] ); + saved_height = work->height; + x16_r_s_getAlgoString( (const uint8_t*)(&vdata[1]), x16r_hash_order ); if ( !opt_quiet && !thr_id ) - applog( LOG_INFO, "hash order %s (%08x)", x16r_hash_order, ntime ); + applog( LOG_INFO, "hash order %s", x16r_hash_order ); } // Do midstate prehash on hash functions with block size <= 64 bytes. diff --git a/algo/x16/x20r.c b/algo/x16/x20r.c new file mode 100644 index 00000000..b6c63966 --- /dev/null +++ b/algo/x16/x20r.c @@ -0,0 +1,362 @@ +#include "miner.h" + +#include +#include +#include + +#include "algo/blake/sph_blake.h" +#include "algo/bmw/sph_bmw.h" +#include "algo/groestl/sph_groestl.h" +#include "algo/jh/sph_jh.h" +#include "algo/keccak/sph_keccak.h" +#include "algo/skein/sph_skein.h" +#include "algo/luffa/sph_luffa.h" +#include "algo/cubehash/sph_cubehash.h" +#include "algo/shavite/sph_shavite.h" +#include "algo/simd/sph_simd.h" +#include "algo/echo/sph_echo.h" +#include "algo/hamsi/sph_hamsi.h" +#include "algo/fugue/sph_fugue.h" +#include "algo/shabal/sph_shabal.h" +#include "algo/whirlpool/sph_whirlpool.h" +#include "algo/sha/sph_sha2.h" +#include "x16r-gate.h" + +#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__) + #define X20R_8WAY 1 +#elif defined(__AVX2__) && defined(__AES__) + #define X20R_4WAY 1 +#elif defined(__SSE2__) || defined(__ARM_NEON) + #define X20R_2WAY 1 +#endif + +// X20R is not what it seems. It does not permute 20 functions over 20 rounds, +// it only permutes 16 of them. The last 4 functions are victims of trying to +// fit 20 elements in the space for only 16. Arithmetic overflow recycles the +// first 4 functions. Otherwise it's identical to X16R. +// Welcome to the real X20R. + +#define X20R_HASH_FUNC_COUNT 20 +/* +enum x20r_algo +{ + BLAKE = 0, + BMW, + GROESTL, + JH, + KECCAK, + SKEIN, + LUFFA, + CUBEHASH, + SHAVITE, + SIMD, + ECHO, + HAMSI, + FUGUE, + SHABAL, + WHIRLPOOL, + SHA512, + HAVAL, // Last 4 names are meaningless and not used + GOST, + RADIOGATUN, + PANAMA, + X20R_HASH_FUNC_COUNT +}; +*/ +static __thread char x20r_hash_order[ X20R_HASH_FUNC_COUNT + 1 ] = {0}; + +static void x20r_getAlgoString(const uint8_t* prevblock, char *output) +{ + char *sptr = output; + + for (int j = 0; j < X20R_HASH_FUNC_COUNT; j++) { + uint8_t b = (19 - j) >> 1; // 16 ascii hex chars, reversed + uint8_t algoDigit = (j & 1) ? prevblock[b] & 0xF : prevblock[b] >> 4; + if (algoDigit >= 10) + sprintf(sptr, "%c", 'A' + (algoDigit - 10)); + else + sprintf(sptr, "%u", (uint32_t) algoDigit); + sptr++; + } + *sptr = '\0'; +} + +#if defined(X20R_8WAY) + +int x20r_8x64_hash( void* output, const void* input, int thrid ) +{ + uint8_t hash[64*8] __attribute__ ((aligned (128))); + if ( !x16r_8x64_hash_generic( hash, input, thrid, x20r_hash_order, + X20R_HASH_FUNC_COUNT ) ) + return 0; + + memcpy( output, hash, 32 ); + memcpy( output+32, hash+64, 32 ); + memcpy( output+64, hash+128, 32 ); + memcpy( output+96, hash+192, 32 ); + memcpy( output+128, hash+256, 32 ); + memcpy( output+160, hash+320, 32 ); + memcpy( output+192, hash+384, 32 ); + memcpy( output+224, hash+448, 32 ); + + return 1; +} + +int scanhash_x20r_8x64( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr) +{ + uint32_t hash[16*8] __attribute__ ((aligned (128))); + uint32_t vdata[20*8] __attribute__ ((aligned (64))); + uint32_t *pdata = work->data; + uint32_t *ptarget = work->target; + const uint32_t first_nonce = pdata[19]; + const uint32_t last_nonce = max_nonce - 8; + uint32_t n = first_nonce; + __m512i *noncev = (__m512i*)vdata + 9; // aligned + const int thr_id = mythr->id; + volatile uint8_t *restart = &(work_restart[thr_id].restart); + const bool bench = opt_benchmark; + + if ( bench ) ptarget[7] = 0x0cff; + + static __thread uint32_t saved_height = UINT32_MAX; + if ( work->height != saved_height ) + { + vdata[1] = bswap_32( pdata[1] ); + vdata[2] = bswap_32( pdata[2] ); + vdata[3] = bswap_32( pdata[3] ); + saved_height = work->height; + x20r_getAlgoString( (const uint8_t*)(&vdata[1]), x20r_hash_order ); + if ( !opt_quiet && !thr_id ) + applog( LOG_INFO, "hash order %s", x20r_hash_order ); + } + + x16r_8x64_prehash( vdata, pdata, x20r_hash_order ); + *noncev = mm512_intrlv_blend_32( _mm512_set_epi32( + n+7, 0, n+6, 0, n+5, 0, n+4, 0, + n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev ); + do + { + if( x20r_8x64_hash( hash, vdata, thr_id ) ); + for ( int i = 0; i < 8; i++ ) + if ( unlikely( valid_hash( hash + (i<<3), ptarget ) && !bench ) ) + { + pdata[19] = bswap_32( n+i ); + submit_solution( work, hash+(i<<3), mythr ); + } + *noncev = _mm512_add_epi32( *noncev, + _mm512_set1_epi64( 0x0000000800000000 ) ); + n += 8; + } while ( likely( ( n < last_nonce ) && !(*restart) ) ); + pdata[19] = n; + *hashes_done = n - first_nonce; + return 0; +} + + +#elif defined(X20R_4WAY) + +int x20r_4x64_hash( void* output, const void* input, int thrid ) +{ + uint8_t hash[64*4] __attribute__ ((aligned (64))); + if ( !x16r_4x64_hash_generic( hash, input, thrid, x20r_hash_order, + X20R_HASH_FUNC_COUNT ) ) + return 0; + + memcpy( output, hash, 32 ); + memcpy( output+32, hash+64, 32 ); + memcpy( output+64, hash+128, 32 ); + memcpy( output+96, hash+192, 32 ); + + return 1; +} + +int scanhash_x20r_4x64( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr) +{ + uint32_t hash[16*4] __attribute__ ((aligned (64))); + uint32_t vdata[20*4] __attribute__ ((aligned (64))); + uint32_t *pdata = work->data; + uint32_t *ptarget = work->target; + const uint32_t first_nonce = pdata[19]; + const uint32_t last_nonce = max_nonce - 4; + uint32_t n = first_nonce; + __m256i *noncev = (__m256i*)vdata + 9; // aligned + const int thr_id = mythr->id; + const bool bench = opt_benchmark; + volatile uint8_t *restart = &(work_restart[thr_id].restart); + + if ( bench ) ptarget[7] = 0x0cff; + + static __thread uint32_t saved_height = UINT32_MAX; + if ( work->height != saved_height ) + { + vdata[1] = bswap_32( pdata[1] ); + vdata[2] = bswap_32( pdata[2] ); + vdata[3] = bswap_32( pdata[3] ); + saved_height = work->height; + x20r_getAlgoString( (const uint8_t*)(&vdata[1]), x20r_hash_order ); + if ( !opt_quiet && !thr_id ) + applog( LOG_INFO, "hash order %s", x20r_hash_order ); + } + + x16r_4x64_prehash( vdata, pdata, x20r_hash_order ); + *noncev = mm256_intrlv_blend_32( + _mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev ); + do + { + if ( x20r_4x64_hash( hash, vdata, thr_id ) ); + for ( int i = 0; i < 4; i++ ) + if ( unlikely( valid_hash( hash + (i<<3), ptarget ) && !bench ) ) + { + pdata[19] = bswap_32( n+i ); + submit_solution( work, hash+(i<<3), mythr ); + } + *noncev = _mm256_add_epi32( *noncev, + _mm256_set1_epi64x( 0x0000000400000000 ) ); + n += 4; + } while ( likely( ( n < last_nonce ) && !(*restart) ) ); + pdata[19] = n; + *hashes_done = n - first_nonce; + return 0; +} + +#elif defined(X20R_2WAY) + +int x20r_2x64_hash( void* output, const void* input, int thrid ) +{ + uint8_t hash[64*2] __attribute__ ((aligned (64))); + if ( !x16r_2x64_hash_generic( hash, input, thrid, x20r_hash_order, + X20R_HASH_FUNC_COUNT ) ) + return 0; + + memcpy( output, hash, 32 ); + memcpy( output+32, hash+64, 32 ); + + return 1; +} + +int scanhash_x20r_2x64( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr) +{ + uint32_t hash[16*2] __attribute__ ((aligned (64))); + uint32_t vdata[20*2] __attribute__ ((aligned (64))); + uint32_t *pdata = work->data; + uint32_t *ptarget = work->target; + const uint32_t first_nonce = pdata[19]; + const uint32_t last_nonce = max_nonce - 2; + uint32_t n = first_nonce; + v128_t *noncev = (v128_t*)vdata + 9; + const int thr_id = mythr->id; + const bool bench = opt_benchmark; + volatile uint8_t *restart = &(work_restart[thr_id].restart); + + if ( bench ) ptarget[7] = 0x0cff; + + static __thread uint32_t saved_height = UINT32_MAX; + if ( work->height != saved_height ) + { + vdata[1] = bswap_32( pdata[1] ); + vdata[2] = bswap_32( pdata[2] ); + vdata[3] = bswap_32( pdata[3] ); + saved_height = work->height; + x20r_getAlgoString( (const uint8_t*)(&vdata[1]), x20r_hash_order ); + if ( !opt_quiet && !thr_id ) + applog( LOG_INFO, "hash order %s", x20r_hash_order ); + } + + x16r_2x64_prehash( vdata, pdata, x20r_hash_order ); + *noncev = v128_intrlv_blend_32( v128_set32( n+1, 0, n, 0 ), *noncev ); + do + { + if ( x20r_2x64_hash( hash, vdata, thr_id ) ); + for ( int i = 0; i < 2; i++ ) + if ( unlikely( valid_hash( hash + (i<<3), ptarget ) && !bench ) ) + { + pdata[19] = bswap_32( n+i ); + submit_solution( work, hash+(i<<3), mythr ); + } + *noncev = v128_add32( *noncev, v128_64( 0x0000000200000000 ) ); + n += 2; + } while ( likely( ( n < last_nonce ) && !(*restart) ) ); + pdata[19] = n; + *hashes_done = n - first_nonce; + return 0; +} + +#else + +int x20r_hash( void* output, const void* input, int thrid ) +{ + uint8_t hash[64] __attribute__ ((aligned (64))); + if ( !x16r_hash_generic( hash, input, thrid, x20r_hash_order, + X20R_HASH_FUNC_COUNT ) ) + return 0; + + memcpy( output, hash, 32 ); + return 1; +} + +int scanhash_x20r( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ) +{ + uint32_t _ALIGN(32) hash32[8]; + uint32_t _ALIGN(32) edata[20]; + uint32_t *pdata = work->data; + uint32_t *ptarget = work->target; + const uint32_t first_nonce = pdata[19]; + const int thr_id = mythr->id; + uint32_t nonce = first_nonce; + volatile uint8_t *restart = &( work_restart[thr_id].restart ); + const bool bench = opt_benchmark; + if ( bench ) ptarget[7] = 0x0cff; + + static __thread uint32_t saved_height = UINT32_MAX; + if ( work->height != saved_height ) + { + edata[1] = bswap_32( pdata[1] ); + edata[2] = bswap_32( pdata[2] ); + edata[3] = bswap_32( pdata[3] ); + saved_height = work->height; + x20r_getAlgoString( (const uint8_t*)(&edata[1]), x20r_hash_order ); + if ( !opt_quiet && !thr_id ) + applog( LOG_INFO, "hash order %s", x20r_hash_order ); + } + + x16r_prehash( edata, pdata, x20r_hash_order ); + + do + { + edata[19] = nonce; + if ( x20r_hash( hash32, edata, thr_id ) ) + if ( unlikely( valid_hash( hash32, ptarget ) && !bench ) ) + { + pdata[19] = bswap_32( nonce ); + submit_solution( work, hash32, mythr ); + } + nonce++; + } while ( nonce < max_nonce && !(*restart) ); + pdata[19] = nonce; + *hashes_done = pdata[19] - first_nonce; + return 0; +} + +#endif + +bool register_x20r_algo( algo_gate_t* gate ) +{ +#if defined (X20R_8WAY) + gate->scanhash = (void*)&scanhash_x20r_8x64; +#elif defined (X20R_4WAY) + gate->scanhash = (void*)&scanhash_x20r_4x64; +#elif defined (X20R_2WAY) + gate->scanhash = (void*)&scanhash_x20r_2x64; +#else + gate->scanhash = (void*)&scanhash_x20r; +#endif + gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT + | NEON_OPT; + opt_target_factor = 256.0; + return true; +}; + diff --git a/algo/x16/x21s-4way.c b/algo/x16/x21s-4way.c index 45b6accd..7a66081b 100644 --- a/algo/x16/x21s-4way.c +++ b/algo/x16/x21s-4way.c @@ -43,7 +43,8 @@ int x21s_8way_hash( void* output, const void* input, int thrid ) uint32_t *hash7 = (uint32_t*)( shash+448 ); x21s_8way_context_overlay ctx; - if ( !x16r_8way_hash_generic( shash, input, thrid ) ) + if ( !x16r_8way_hash_generic( shash, input, thrid, x16r_hash_order, + X16R_HASH_FUNC_COUNT ) ) return 0; intrlv_8x32_512( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6, @@ -135,7 +136,6 @@ int scanhash_x21s_8way( struct work *work, uint32_t max_nonce, uint32_t vdata[20*8] __attribute__ ((aligned (64))); uint32_t *hash7 = &hash[7<<3]; uint32_t lane_hash[8] __attribute__ ((aligned (64))); - uint32_t bedata1[2] __attribute__((aligned(64))); uint32_t *pdata = work->data; uint32_t *ptarget = work->target; const uint32_t Htarg = ptarget[7]; @@ -149,20 +149,18 @@ int scanhash_x21s_8way( struct work *work, uint32_t max_nonce, if ( bench ) ptarget[7] = 0x0cff; - bedata1[0] = bswap_32( pdata[1] ); - bedata1[1] = bswap_32( pdata[2] ); - - static __thread uint32_t s_ntime = UINT32_MAX; - uint32_t ntime = bswap_32( pdata[17] ); - if ( s_ntime != ntime ) + static __thread uint32_t saved_height = UINT32_MAX; + if ( work->height != saved_height ) { - x16_r_s_getAlgoString( (const uint8_t*)bedata1, x16r_hash_order ); - s_ntime = ntime; - if ( opt_debug && !thr_id ) - applog( LOG_INFO, "hash order %s (%08x)", x16r_hash_order, ntime ); + vdata[1] = bswap_32( pdata[1] ); + vdata[2] = bswap_32( pdata[2] ); + saved_height = work->height; + x16_r_s_getAlgoString( (const uint8_t*)(&vdata[1]), x16r_hash_order ); + if ( !opt_quiet && !thr_id ) + applog( LOG_INFO, "hash order %s", x16r_hash_order ); } - x16r_8way_prehash( vdata, pdata ); + x16r_8way_prehash( vdata, pdata, x16r_hash_order ); *noncev = mm512_intrlv_blend_32( _mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0, n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev ); @@ -224,7 +222,8 @@ int x21s_4way_hash( void* output, const void* input, int thrid ) uint32_t *hash2 = (uint32_t*)( shash+128 ); uint32_t *hash3 = (uint32_t*)( shash+192 ); - if ( !x16r_4way_hash_generic( shash, input, thrid ) ) + if ( !x16r_4way_hash_generic( shash, input, thrid, x16r_hash_order, + X16R_HASH_FUNC_COUNT ) ) return 0; intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 512 ); @@ -308,20 +307,18 @@ int scanhash_x21s_4way( struct work *work, uint32_t max_nonce, if ( bench ) ptarget[7] = 0x0cff; - bedata1[0] = bswap_32( pdata[1] ); - bedata1[1] = bswap_32( pdata[2] ); - - static __thread uint32_t s_ntime = UINT32_MAX; - uint32_t ntime = bswap_32( pdata[17] ); - if ( s_ntime != ntime ) + static __thread uint32_t saved_height = UINT32_MAX; + if ( work->height != saved_height ) { - x16_r_s_getAlgoString( (const uint8_t*)bedata1, x16r_hash_order ); - s_ntime = ntime; - if ( opt_debug && !thr_id ) - applog( LOG_DEBUG, "hash order %s (%08x)", x16r_hash_order, ntime ); + vdata[1] = bswap_32( pdata[1] ); + vdata[2] = bswap_32( pdata[2] ); + saved_height = work->height; + x16_r_s_getAlgoString( (const uint8_t*)(&vdata[1]), x16r_hash_order ); + if ( !opt_quiet && !thr_id ) + applog( LOG_INFO, "hash order %s", x16r_hash_order ); } - x16r_4way_prehash( vdata, pdata ); + x16r_4way_prehash( vdata, pdata, x16r_hash_order ); *noncev = mm256_intrlv_blend_32( _mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev ); do @@ -372,7 +369,8 @@ int x21s_2x64_hash( void* output, const void* input, int thrid ) uint32_t *hash0 = (uint32_t*) shash; uint32_t *hash1 = (uint32_t*)( shash+64 ); - if ( !x16r_2x64_hash_generic( shash, input, thrid ) ) + if ( !x16r_2x64_hash_generic( shash, input, thrid, x16r_hash_order, + X16R_HASH_FUNC_COUNT ) ) return 0; sph_haval256_5_init( &ctx.haval ); @@ -412,7 +410,6 @@ int scanhash_x21s_2x64( struct work *work, uint32_t max_nonce, { uint32_t hash[16*2] __attribute__ ((aligned (64))); uint32_t vdata[20*2] __attribute__ ((aligned (64))); - uint32_t bedata1[2] __attribute__((aligned(64))); uint32_t *pdata = work->data; uint32_t *ptarget = work->target; const uint32_t first_nonce = pdata[19]; @@ -425,20 +422,18 @@ int scanhash_x21s_2x64( struct work *work, uint32_t max_nonce, if ( bench ) ptarget[7] = 0x0cff; - bedata1[0] = bswap_32( pdata[1] ); - bedata1[1] = bswap_32( pdata[2] ); - - static __thread uint32_t s_ntime = UINT32_MAX; - uint32_t ntime = bswap_32( pdata[17] ); - if ( s_ntime != ntime ) + static __thread uint32_t saved_height = UINT32_MAX; + if ( work->height != saved_height ) { - x16_r_s_getAlgoString( (const uint8_t*)bedata1, x16r_hash_order ); - s_ntime = ntime; - if ( opt_debug && !thr_id ) - applog( LOG_DEBUG, "hash order %s (%08x)", x16r_hash_order, ntime ); + vdata[1] = bswap_32( pdata[1] ); + vdata[2] = bswap_32( pdata[2] ); + saved_height = work->height; + x16_r_s_getAlgoString( (const uint8_t*)(&vdata[1]), x16r_hash_order ); + if ( !opt_quiet && !thr_id ) + applog( LOG_INFO, "hash order %s", x16r_hash_order ); } - x16r_2x64_prehash( vdata, pdata ); + x16r_2x64_prehash( vdata, pdata, x16r_hash_order ); *noncev = v128_intrlv_blend_32( v128_set32( n+1, 0, n, 0 ), *noncev ); do { diff --git a/algo/x16/x21s.c b/algo/x16/x21s.c index 526c3d43..e9957fd7 100644 --- a/algo/x16/x21s.c +++ b/algo/x16/x21s.c @@ -33,7 +33,8 @@ int x21s_hash( void* output, const void* input, int thrid ) uint32_t _ALIGN(128) hash[16]; x21s_context_overlay ctx; - if ( !x16r_hash_generic( hash, input, thrid ) ) + if ( !x16r_hash_generic( hash, input, thrid, x16r_hash_order, + X16R_HASH_FUNC_COUNT ) ) return 0; sph_haval256_5_init( &ctx.haval ); @@ -84,7 +85,7 @@ int scanhash_x21s( struct work *work, uint32_t max_nonce, applog( LOG_INFO, "hash order %s (%08x)", x16r_hash_order, ntime ); } - x16r_prehash( edata, pdata ); + x16r_prehash( edata, pdata, x16r_hash_order ); do { diff --git a/configure b/configure index a9017b1a..8424a26e 100755 --- a/configure +++ b/configure @@ -1,6 +1,6 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.71 for cpuminer-opt 23.12. +# Generated by GNU Autoconf 2.71 for cpuminer-opt 23.13. # # # Copyright (C) 1992-1996, 1998-2017, 2020-2021 Free Software Foundation, @@ -608,8 +608,8 @@ MAKEFLAGS= # Identity of this package. PACKAGE_NAME='cpuminer-opt' PACKAGE_TARNAME='cpuminer-opt' -PACKAGE_VERSION='23.12' -PACKAGE_STRING='cpuminer-opt 23.12' +PACKAGE_VERSION='23.13' +PACKAGE_STRING='cpuminer-opt 23.13' PACKAGE_BUGREPORT='' PACKAGE_URL='' @@ -1360,7 +1360,7 @@ if test "$ac_init_help" = "long"; then # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -\`configure' configures cpuminer-opt 23.12 to adapt to many kinds of systems. +\`configure' configures cpuminer-opt 23.13 to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... @@ -1432,7 +1432,7 @@ fi if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of cpuminer-opt 23.12:";; + short | recursive ) echo "Configuration of cpuminer-opt 23.13:";; esac cat <<\_ACEOF @@ -1538,7 +1538,7 @@ fi test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF -cpuminer-opt configure 23.12 +cpuminer-opt configure 23.13 generated by GNU Autoconf 2.71 Copyright (C) 2021 Free Software Foundation, Inc. @@ -1985,7 +1985,7 @@ cat >config.log <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by cpuminer-opt $as_me 23.12, which was +It was created by cpuminer-opt $as_me 23.13, which was generated by GNU Autoconf 2.71. Invocation command line was $ $0$ac_configure_args_raw @@ -3593,7 +3593,7 @@ fi # Define the identity of the package. PACKAGE='cpuminer-opt' - VERSION='23.12' + VERSION='23.13' printf "%s\n" "#define PACKAGE \"$PACKAGE\"" >>confdefs.h @@ -7508,7 +7508,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" -This file was extended by cpuminer-opt $as_me 23.12, which was +This file was extended by cpuminer-opt $as_me 23.13, which was generated by GNU Autoconf 2.71. Invocation command line was CONFIG_FILES = $CONFIG_FILES @@ -7576,7 +7576,7 @@ ac_cs_config_escaped=`printf "%s\n" "$ac_cs_config" | sed "s/^ //; s/'/'\\\\\\\\ cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_cs_config='$ac_cs_config_escaped' ac_cs_version="\\ -cpuminer-opt config.status 23.12 +cpuminer-opt config.status 23.13 configured by $0, generated by GNU Autoconf 2.71, with options \\"\$ac_cs_config\\" diff --git a/configure.ac b/configure.ac index d397762d..9f839b1e 100644 --- a/configure.ac +++ b/configure.ac @@ -1,4 +1,4 @@ -AC_INIT([cpuminer-opt], [23.12]) +AC_INIT([cpuminer-opt], [23.13]) AC_PREREQ([2.59c]) AC_CANONICAL_SYSTEM diff --git a/configure~ b/configure~ index 2ca5e67e..9f87ae04 100755 --- a/configure~ +++ b/configure~ @@ -1,6 +1,6 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.69 for cpuminer-opt 23.12. +# Generated by GNU Autoconf 2.69 for cpuminer-opt 23.13. # # # Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc. @@ -577,8 +577,8 @@ MAKEFLAGS= # Identity of this package. PACKAGE_NAME='cpuminer-opt' PACKAGE_TARNAME='cpuminer-opt' -PACKAGE_VERSION='23.12' -PACKAGE_STRING='cpuminer-opt 23.12' +PACKAGE_VERSION='23.13' +PACKAGE_STRING='cpuminer-opt 23.13' PACKAGE_BUGREPORT='' PACKAGE_URL='' @@ -1332,7 +1332,7 @@ if test "$ac_init_help" = "long"; then # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -\`configure' configures cpuminer-opt 23.12 to adapt to many kinds of systems. +\`configure' configures cpuminer-opt 23.13 to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... @@ -1404,7 +1404,7 @@ fi if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of cpuminer-opt 23.12:";; + short | recursive ) echo "Configuration of cpuminer-opt 23.13:";; esac cat <<\_ACEOF @@ -1509,7 +1509,7 @@ fi test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF -cpuminer-opt configure 23.12 +cpuminer-opt configure 23.13 generated by GNU Autoconf 2.69 Copyright (C) 2012 Free Software Foundation, Inc. @@ -2012,7 +2012,7 @@ cat >config.log <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by cpuminer-opt $as_me 23.12, which was +It was created by cpuminer-opt $as_me 23.13, which was generated by GNU Autoconf 2.69. Invocation command line was $ $0 $@ @@ -2993,7 +2993,7 @@ fi # Define the identity of the package. PACKAGE='cpuminer-opt' - VERSION='23.12' + VERSION='23.13' cat >>confdefs.h <<_ACEOF @@ -6718,7 +6718,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" -This file was extended by cpuminer-opt $as_me 23.12, which was +This file was extended by cpuminer-opt $as_me 23.13, which was generated by GNU Autoconf 2.69. Invocation command line was CONFIG_FILES = $CONFIG_FILES @@ -6784,7 +6784,7 @@ _ACEOF cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`" ac_cs_version="\\ -cpuminer-opt config.status 23.12 +cpuminer-opt config.status 23.13 configured by $0, generated by GNU Autoconf 2.69, with options \\"\$ac_cs_config\\" diff --git a/cpu-miner.c b/cpu-miner.c index 9e5b33c6..6db8dae2 100644 --- a/cpu-miner.c +++ b/cpu-miner.c @@ -2837,15 +2837,6 @@ static void show_credits() #define check_cpu_capability() cpu_capability( false ) #define display_cpu_capability() cpu_capability( true ) -#if defined(__aarch64__) - -#define XSTR(x) STR(x) -#define STR(x) #x - -//#pragma message "Building for armv" XSTR(__ARM_ARCH) - -#endif - static bool cpu_capability( bool display_only ) { char cpu_brand[0x40]; diff --git a/miner.h b/miner.h index cfa54a2d..5d0cebf0 100644 --- a/miner.h +++ b/miner.h @@ -672,6 +672,7 @@ enum algos { ALGO_X16RT_VEIL, ALGO_X16S, ALGO_X17, + ALGO_X20R, ALGO_X21S, ALGO_X22I, ALGO_X25X, @@ -767,6 +768,7 @@ static const char* const algo_names[] = { "x16rt-veil", "x16s", "x17", + "x20r", "x21s", "x22i", "x25x", @@ -930,6 +932,7 @@ Options:\n\ x16rt-veil Veil (VEIL)\n\ x16s\n\ x17\n\ + x20r\n\ x21s\n\ x22i\n\ x25x\n\ diff --git a/simd-utils/intrlv.h b/simd-utils/intrlv.h index dfb9c71b..bcf85dbe 100644 --- a/simd-utils/intrlv.h +++ b/simd-utils/intrlv.h @@ -381,7 +381,7 @@ static inline void dintrlv_4x32_512( void *dst0, void *dst1, void *dst2, d0[15] = s[ 60]; d1[15] = s[ 61]; d2[15] = s[ 62]; d3[15] = s[ 63]; } -#endif // SSE4_1 else SSE2 or NEON +#endif // SSE4_1 or NEON else SSE2 static inline void extr_lane_4x32( void *d, const void *s, const int lane, const int bit_len ) diff --git a/simd-utils/simd-neon.h b/simd-utils/simd-neon.h index 94df1dce..2e7a4bc5 100644 --- a/simd-utils/simd-neon.h +++ b/simd-utils/simd-neon.h @@ -40,7 +40,7 @@ #define v128u8_load( p ) vld1q_u16( (uint8_t*)(p) ) #define v128u8_store( p, v ) vst1q_u16( (uint8_t*)(p), v ) -// load & set1 combined +// load & set1 combined, doesn't work #define v128_load1_64(p) vld1q_dup_u64( (uint64_t*)(p) ) #define v128_load1_32(p) vld1q_dup_u32( (uint32_t*)(p) ) #define v128_load1_16(p) vld1q_dup_u16( (uint16_t*)(p) ) diff --git a/sysinfos.c b/sysinfos.c index 3e5fcc25..c054a85b 100644 --- a/sysinfos.c +++ b/sysinfos.c @@ -930,7 +930,9 @@ static inline void cpu_brand_string( char* s ) #elif defined(__arm__) || defined(__aarch64__) - sprintf( s, "ARM 64 bit CPU" ); + unsigned int cpu_info[4] = { 0 }; + cpuid( 0, 0, cpu_info ); + sprintf( s, "ARM 64 bit CPU, HWCAP %08x", cpu_info[0] ); #else