From 0d48d573ce5add60dcfa3cdc21e3adffcfc8fcfb Mon Sep 17 00:00:00 2001 From: Jay D Dee Date: Wed, 26 Jun 2019 14:16:01 -0400 Subject: [PATCH] v3.9.5 --- Makefile.am | 7 +- README.md | 8 +- RELEASE_NOTES | 8 + algo-gate-api.c | 220 ++++++++------- algo-gate-api.h | 10 +- algo/argon2/argon2a/argon2a.c | 4 +- algo/argon2/argon2d/argon2d-gate.c | 16 +- algo/argon2/argon2d/argon2d-gate.h | 12 +- algo/blake/blake-4way.c | 43 ++- algo/blake/blake-gate.h | 8 +- algo/blake/blake.c | 5 +- algo/blake/blake2b.c | 5 +- algo/blake/blake2s-4way.c | 44 ++- algo/blake/blake2s-gate.h | 12 +- algo/blake/blake2s.c | 5 +- algo/blake/blakecoin-4way.c | 44 ++- algo/blake/blakecoin-gate.h | 12 +- algo/blake/blakecoin.c | 5 +- algo/blake/decred-4way.c | 22 +- algo/blake/decred-gate.h | 10 +- algo/blake/decred.c | 4 +- algo/blake/pentablake-4way.c | 25 +- algo/blake/pentablake-gate.h | 8 +- algo/blake/pentablake.c | 5 +- algo/bmw/bmw256.c | 5 +- algo/cryptonight/cryptolight.c | 5 +- algo/cryptonight/cryptonight-common.c | 5 +- algo/cryptonight/cryptonight.h | 4 +- algo/groestl/aes_ni/brg_endian.h | 2 +- algo/groestl/groestl.c | 5 +- algo/groestl/myr-groestl.c | 5 +- algo/groestl/myrgr-4way.c | 50 ++-- algo/groestl/myrgr-gate.h | 8 +- algo/heavy/bastion.c | 4 +- algo/heavy/heavy.c | 5 +- algo/hodl/hodl-gate.c | 8 +- algo/hodl/hodl-wolf.c | 5 +- algo/hodl/hodl-wolf.h | 4 +- algo/jh/jha-4way.c | 62 +++-- algo/jh/jha-gate.h | 8 +- algo/jh/jha.c | 6 +- algo/keccak/keccak-4way.c | 31 +-- algo/keccak/keccak-gate.h | 8 +- algo/keccak/keccak.c | 5 +- algo/lyra2/allium-4way.c | 6 +- algo/lyra2/allium.c | 16 +- algo/lyra2/lyra2-gate.h | 28 +- algo/lyra2/lyra2h-4way.c | 6 +- algo/lyra2/lyra2h.c | 19 +- algo/lyra2/lyra2re.c | 20 +- algo/lyra2/lyra2rev2-4way.c | 6 +- algo/lyra2/lyra2rev2.c | 56 ++-- algo/lyra2/lyra2rev3-4way.c | 12 +- algo/lyra2/lyra2rev3.c | 28 +- algo/lyra2/lyra2z-4way.c | 12 +- algo/lyra2/lyra2z.c | 18 +- algo/lyra2/lyra2z330.c | 19 +- algo/lyra2/phi2-4way.c | 6 +- algo/lyra2/phi2.c | 29 +- algo/m7m.c | 4 +- algo/nist5/nist5-4way.c | 24 +- algo/nist5/nist5-gate.h | 8 +- algo/nist5/nist5.c | 5 +- algo/nist5/zr5.c | 5 +- algo/quark/anime-4way.c | 6 +- algo/quark/anime-gate.h | 4 +- algo/quark/anime.c | 4 +- algo/quark/hmq1725-4way.c | 10 +- algo/quark/hmq1725-gate.h | 4 +- algo/quark/hmq1725.c | 4 +- algo/quark/quark-4way.c | 6 +- algo/quark/quark-gate.h | 4 +- algo/quark/quark.c | 4 +- algo/qubit/deep-2way.c | 12 +- algo/qubit/deep-gate.h | 4 +- algo/qubit/deep.c | 4 +- algo/qubit/qubit-2way.c | 16 +- algo/qubit/qubit-gate.h | 4 +- algo/qubit/qubit.c | 4 +- algo/ripemd/lbry-4way.c | 6 +- algo/ripemd/lbry-gate.h | 6 +- algo/ripemd/lbry.c | 4 +- algo/{neoscrypt => scrypt}/neoscrypt.c | 5 +- algo/{ => scrypt}/pluck.c | 4 +- algo/{ => scrypt}/scrypt.c | 4 +- algo/scryptjane/scrypt-jane.c | 5 +- algo/sha/sha2.c | 17 +- algo/sha/sha256q-4way.c | 12 +- algo/sha/sha256q.c | 19 +- algo/sha/sha256t-4way.c | 22 +- algo/sha/sha256t-gate.h | 12 +- algo/sha/sha256t.c | 19 +- algo/shavite/shavite.c | 5 +- algo/skein/skein-4way.c | 8 +- algo/skein/skein-gate.h | 4 +- algo/skein/skein.c | 4 +- algo/skein/skein2-4way.c | 6 +- algo/skein/skein2-gate.h | 4 +- algo/skein/skein2.c | 4 +- algo/whirlpool/whirlpool-4way.c | 105 ------- algo/whirlpool/whirlpool-gate.h | 8 +- algo/whirlpool/whirlpool.c | 5 +- algo/whirlpool/whirlpoolx.c | 5 +- algo/x11/c11-4way.c | 41 ++- algo/x11/c11-gate.h | 8 +- algo/x11/c11.c | 5 +- algo/x11/fresh.c | 5 +- algo/x11/timetravel-4way.c | 49 ++-- algo/x11/timetravel-gate.h | 8 +- algo/x11/timetravel.c | 6 +- algo/x11/timetravel10-4way.c | 65 +++-- algo/x11/timetravel10-gate.h | 8 +- algo/x11/timetravel10.c | 5 +- algo/x11/tribus-4way.c | 20 +- algo/x11/tribus-gate.h | 8 +- algo/x11/tribus.c | 4 +- algo/x11/x11-4way.c | 41 ++- algo/x11/x11-gate.h | 8 +- algo/x11/x11.c | 5 +- algo/x11/x11evo-4way.c | 69 +++-- algo/x11/x11evo-gate.h | 8 +- algo/x11/x11evo.c | 5 +- algo/x11/x11gost-4way.c | 41 ++- algo/x11/x11gost-gate.h | 8 +- algo/x11/x11gost.c | 5 +- algo/x12/x12-4way.c | 68 ++--- algo/x12/x12-gate.h | 8 +- algo/x12/x12.c | 5 +- algo/x13/drop.c | 4 +- algo/x13/phi1612-4way.c | 37 +-- algo/x13/phi1612-gate.h | 8 +- algo/x13/phi1612.c | 5 +- algo/x13/skunk-4way.c | 21 +- algo/x13/skunk-gate.h | 8 +- algo/x13/skunk.c | 5 +- algo/x13/x13-4way.c | 47 ++-- algo/x13/x13-gate.h | 8 +- algo/x13/x13.c | 5 +- algo/x13/x13sm3-4way.c | 51 ++-- algo/x13/x13sm3-gate.h | 8 +- algo/x13/x13sm3.c | 5 +- algo/x14/axiom.c | 4 +- algo/x14/polytimos-4way.c | 6 +- algo/x14/polytimos-gate.h | 4 +- algo/x14/polytimos.c | 4 +- algo/x14/veltor-4way.c | 13 +- algo/x14/veltor-gate.h | 4 +- algo/x14/veltor.c | 4 +- algo/x14/x14-4way.c | 17 +- algo/x14/x14-gate.h | 4 +- algo/x14/x14.c | 4 +- algo/x15/x15-4way.c | 18 +- algo/x15/x15-gate.h | 4 +- algo/x15/x15.c | 8 +- algo/x16/x16r-4way.c | 6 +- algo/x16/x16r-gate.h | 4 +- algo/x16/x16r.c | 13 +- algo/x17/sonoa-4way.c | 6 +- algo/x17/sonoa-gate.h | 4 +- algo/x17/sonoa.c | 65 ++--- algo/x17/x17-4way.c | 6 +- algo/x17/x17-gate.h | 4 +- algo/x17/x17.c | 36 +-- algo/x17/xevan-4way.c | 6 +- algo/x17/xevan-gate.h | 4 +- algo/x17/xevan.c | 17 +- algo/yescrypt/yescrypt.c | 4 +- algo/yespower/yespower.c | 13 +- configure | 20 +- configure.ac | 2 +- cpu-miner.c | 361 +++++++++++++++---------- miner.h | 28 +- simd-utils/intrlv-avx2.h | 10 - simd-utils/intrlv-sse2.h | 3 - 174 files changed, 1352 insertions(+), 1556 deletions(-) rename algo/{neoscrypt => scrypt}/neoscrypt.c (99%) rename algo/{ => scrypt}/pluck.c (99%) rename algo/{ => scrypt}/scrypt.c (99%) delete mode 100644 algo/whirlpool/whirlpool-4way.c diff --git a/Makefile.am b/Makefile.am index 5daee491..141d8295 100644 --- a/Makefile.am +++ b/Makefile.am @@ -134,14 +134,12 @@ cpuminer_SOURCES = \ algo/lyra2/phi2-4way.c \ algo/lyra2/phi2.c \ algo/m7m.c \ - algo/neoscrypt/neoscrypt.c \ algo/nist5/nist5-gate.c \ algo/nist5/nist5-4way.c \ algo/nist5/nist5.c \ algo/nist5/zr5.c \ algo/panama/sph_panama.c \ algo/radiogatun/sph_radiogatun.c \ - algo/pluck.c \ algo/quark/quark-gate.c \ algo/quark/quark.c \ algo/quark/quark-4way.c \ @@ -162,7 +160,9 @@ cpuminer_SOURCES = \ algo/ripemd/lbry-gate.c \ algo/ripemd/lbry.c \ algo/ripemd/lbry-4way.c \ - algo/scrypt.c \ + algo/scrypt/scrypt.c \ + algo/scrypt/neoscrypt.c \ + algo/scrypt/pluck.c \ algo/scryptjane/scrypt-jane.c \ algo/sha/sph_sha2.c \ algo/sha/sph_sha2big.c \ @@ -198,7 +198,6 @@ cpuminer_SOURCES = \ algo/whirlpool/sph_whirlpool.c \ algo/whirlpool/whirlpool-hash-4way.c \ algo/whirlpool/whirlpool-gate.c \ - algo/whirlpool/whirlpool-4way.c \ algo/whirlpool/whirlpool.c \ algo/whirlpool/whirlpoolx.c \ algo/x11/x11-gate.c \ diff --git a/README.md b/README.md index 793c0cfa..5d97b24b 100644 --- a/README.md +++ b/README.md @@ -59,9 +59,6 @@ Supported Algorithms blake2s Blake-2 S bmw BMW 256 c11 Chaincoin - cryptolight Cryptonight-light - cryptonight - cryptonightv7 Monero (XMR) decred deep Deepcoin (DCN) dmd-gr Diamond-Groestl @@ -78,9 +75,9 @@ Supported Algorithms luffa Luffa lyra2h Hppcoin lyra2re lyra2 - lyra2rev2 lyra2v2, Vertcoin + lyra2rev2 lyra2v2 lyra2rev3 lyrav2v3, Vertcoin - lyra2z Zcoin (XZC) + lyra2z lyra2z330 Lyra2 330 rows, Zoin (ZOI) m7m Magi (XMG) myr-gr Myriad-Groestl @@ -97,6 +94,7 @@ Supported Algorithms scrypt:N scrypt(N, 1, 1) scryptjane:nf sha256d Double SHA-256 + sha256q Quad SHA-256, Pyrite (PYE) sha256t Triple SHA-256, Onecoin (OC) shavite3 Shavite3 skein Skein+Sha (Skeincoin) diff --git a/RELEASE_NOTES b/RELEASE_NOTES index d84f172f..c430d1a5 100644 --- a/RELEASE_NOTES +++ b/RELEASE_NOTES @@ -38,6 +38,14 @@ supported. Change Log ---------- +v3.9.5 + +New share reporting information includes calculation of equivalent hhashrate +based on share difficulty, network latency, 5 minute summary. +Per-thread hash rate reports are disabled by default. +New command line option --hash-meter added to enable per-thread hash rates. + + v3.9.4 Faster AVX2 for lyra2v3, quark, anime. diff --git a/algo-gate-api.c b/algo-gate-api.c index 0b0ae0e2..dad5c391 100644 --- a/algo-gate-api.c +++ b/algo-gate-api.c @@ -71,7 +71,6 @@ bool return_false () { return false; } void *return_null () { return NULL; } void call_error () { printf("ERR: Uninitialized function pointer\n"); } - void algo_not_tested() { applog( LOG_WARNING,"Algo %s has not been tested live. It may not work", @@ -149,111 +148,110 @@ void init_algo_gate( algo_gate_t* gate ) // called by each thread that uses the gate bool register_algo_gate( int algo, algo_gate_t *gate ) { - if ( NULL == gate ) - { - applog(LOG_ERR,"FAIL: algo_gate registration failed, NULL gate\n"); - return false; - } + if ( NULL == gate ) + { + applog(LOG_ERR,"FAIL: algo_gate registration failed, NULL gate\n"); + return false; + } - init_algo_gate( gate ); + init_algo_gate( gate ); - switch (algo) - { - case ALGO_ALLIUM: register_allium_algo ( gate ); break; - case ALGO_ANIME: register_anime_algo ( gate ); break; - case ALGO_ARGON2: register_argon2_algo ( gate ); break; - case ALGO_ARGON2D250: register_argon2d_crds_algo ( gate ); break; - case ALGO_ARGON2D500: register_argon2d_dyn_algo ( gate ); break; - case ALGO_ARGON2D4096: register_argon2d4096_algo ( gate ); break; - case ALGO_AXIOM: register_axiom_algo ( gate ); break; - case ALGO_BASTION: register_bastion_algo ( gate ); break; - case ALGO_BLAKE: register_blake_algo ( gate ); break; - case ALGO_BLAKECOIN: register_blakecoin_algo ( gate ); break; -// case ALGO_BLAKE2B: register_blake2b_algo ( gate ); break; - case ALGO_BLAKE2S: register_blake2s_algo ( gate ); break; - case ALGO_C11: register_c11_algo ( gate ); break; - case ALGO_CRYPTOLIGHT: register_cryptolight_algo ( gate ); break; - case ALGO_CRYPTONIGHT: register_cryptonight_algo ( gate ); break; - case ALGO_CRYPTONIGHTV7:register_cryptonightv7_algo( gate ); break; - case ALGO_DECRED: register_decred_algo ( gate ); break; - case ALGO_DEEP: register_deep_algo ( gate ); break; - case ALGO_DMD_GR: register_dmd_gr_algo ( gate ); break; - case ALGO_DROP: register_drop_algo ( gate ); break; - case ALGO_FRESH: register_fresh_algo ( gate ); break; - case ALGO_GROESTL: register_groestl_algo ( gate ); break; - case ALGO_HEAVY: register_heavy_algo ( gate ); break; - case ALGO_HMQ1725: register_hmq1725_algo ( gate ); break; - case ALGO_HODL: register_hodl_algo ( gate ); break; - case ALGO_JHA: register_jha_algo ( gate ); break; - case ALGO_KECCAK: register_keccak_algo ( gate ); break; - case ALGO_KECCAKC: register_keccakc_algo ( gate ); break; - case ALGO_LBRY: register_lbry_algo ( gate ); break; - case ALGO_LUFFA: register_luffa_algo ( gate ); break; - case ALGO_LYRA2H: register_lyra2h_algo ( gate ); break; - case ALGO_LYRA2RE: register_lyra2re_algo ( gate ); break; - case ALGO_LYRA2REV2: register_lyra2rev2_algo ( gate ); break; - case ALGO_LYRA2REV3: register_lyra2rev3_algo ( gate ); break; - case ALGO_LYRA2Z: register_lyra2z_algo ( gate ); break; - case ALGO_LYRA2Z330: register_lyra2z330_algo ( gate ); break; - case ALGO_M7M: register_m7m_algo ( gate ); break; - case ALGO_MYR_GR: register_myriad_algo ( gate ); break; - case ALGO_NEOSCRYPT: register_neoscrypt_algo ( gate ); break; - case ALGO_NIST5: register_nist5_algo ( gate ); break; - case ALGO_PENTABLAKE: register_pentablake_algo ( gate ); break; - case ALGO_PHI1612: register_phi1612_algo ( gate ); break; - case ALGO_PHI2: register_phi2_algo ( gate ); break; - case ALGO_PLUCK: register_pluck_algo ( gate ); break; - case ALGO_POLYTIMOS: register_polytimos_algo ( gate ); break; - case ALGO_QUARK: register_quark_algo ( gate ); break; - case ALGO_QUBIT: register_qubit_algo ( gate ); break; - case ALGO_SCRYPT: register_scrypt_algo ( gate ); break; - case ALGO_SCRYPTJANE: register_scryptjane_algo ( gate ); break; - case ALGO_SHA256D: register_sha256d_algo ( gate ); break; - case ALGO_SHA256T: register_sha256t_algo ( gate ); break; - case ALGO_SHA256Q: register_sha256q_algo ( gate ); break; - case ALGO_SHAVITE3: register_shavite_algo ( gate ); break; - case ALGO_SKEIN: register_skein_algo ( gate ); break; - case ALGO_SKEIN2: register_skein2_algo ( gate ); break; - case ALGO_SKUNK: register_skunk_algo ( gate ); break; - case ALGO_SONOA: register_sonoa_algo ( gate ); break; - case ALGO_TIMETRAVEL: register_timetravel_algo ( gate ); break; - case ALGO_TIMETRAVEL10: register_timetravel10_algo ( gate ); break; - case ALGO_TRIBUS: register_tribus_algo ( gate ); break; - case ALGO_VANILLA: register_vanilla_algo ( gate ); break; - case ALGO_VELTOR: register_veltor_algo ( gate ); break; - case ALGO_WHIRLPOOL: register_whirlpool_algo ( gate ); break; - case ALGO_WHIRLPOOLX: register_whirlpoolx_algo ( gate ); break; - case ALGO_X11: register_x11_algo ( gate ); break; - case ALGO_X11EVO: register_x11evo_algo ( gate ); break; - case ALGO_X11GOST: register_x11gost_algo ( gate ); break; - case ALGO_X12: register_x12_algo ( gate ); break; - case ALGO_X13: register_x13_algo ( gate ); break; - case ALGO_X13SM3: register_x13sm3_algo ( gate ); break; - case ALGO_X14: register_x14_algo ( gate ); break; - case ALGO_X15: register_x15_algo ( gate ); break; - case ALGO_X16R: register_x16r_algo ( gate ); break; - case ALGO_X16S: register_x16s_algo ( gate ); break; - case ALGO_X17: register_x17_algo ( gate ); break; - case ALGO_XEVAN: register_xevan_algo ( gate ); break; + switch (algo) + { + case ALGO_ALLIUM: register_allium_algo ( gate ); break; + case ALGO_ANIME: register_anime_algo ( gate ); break; + case ALGO_ARGON2: register_argon2_algo ( gate ); break; + case ALGO_ARGON2D250: register_argon2d_crds_algo ( gate ); break; + case ALGO_ARGON2D500: register_argon2d_dyn_algo ( gate ); break; + case ALGO_ARGON2D4096: register_argon2d4096_algo ( gate ); break; + case ALGO_AXIOM: register_axiom_algo ( gate ); break; + case ALGO_BASTION: register_bastion_algo ( gate ); break; + case ALGO_BLAKE: register_blake_algo ( gate ); break; + case ALGO_BLAKECOIN: register_blakecoin_algo ( gate ); break; +// case ALGO_BLAKE2B: register_blake2b_algo ( gate ); break; + case ALGO_BLAKE2S: register_blake2s_algo ( gate ); break; + case ALGO_C11: register_c11_algo ( gate ); break; + case ALGO_CRYPTOLIGHT: register_cryptolight_algo ( gate ); break; + case ALGO_CRYPTONIGHT: register_cryptonight_algo ( gate ); break; + case ALGO_CRYPTONIGHTV7: register_cryptonightv7_algo ( gate ); break; + case ALGO_DECRED: register_decred_algo ( gate ); break; + case ALGO_DEEP: register_deep_algo ( gate ); break; + case ALGO_DMD_GR: register_dmd_gr_algo ( gate ); break; + case ALGO_DROP: register_drop_algo ( gate ); break; + case ALGO_FRESH: register_fresh_algo ( gate ); break; + case ALGO_GROESTL: register_groestl_algo ( gate ); break; + case ALGO_HEAVY: register_heavy_algo ( gate ); break; + case ALGO_HMQ1725: register_hmq1725_algo ( gate ); break; + case ALGO_HODL: register_hodl_algo ( gate ); break; + case ALGO_JHA: register_jha_algo ( gate ); break; + case ALGO_KECCAK: register_keccak_algo ( gate ); break; + case ALGO_KECCAKC: register_keccakc_algo ( gate ); break; + case ALGO_LBRY: register_lbry_algo ( gate ); break; + case ALGO_LUFFA: register_luffa_algo ( gate ); break; + case ALGO_LYRA2H: register_lyra2h_algo ( gate ); break; + case ALGO_LYRA2RE: register_lyra2re_algo ( gate ); break; + case ALGO_LYRA2REV2: register_lyra2rev2_algo ( gate ); break; + case ALGO_LYRA2REV3: register_lyra2rev3_algo ( gate ); break; + case ALGO_LYRA2Z: register_lyra2z_algo ( gate ); break; + case ALGO_LYRA2Z330: register_lyra2z330_algo ( gate ); break; + case ALGO_M7M: register_m7m_algo ( gate ); break; + case ALGO_MYR_GR: register_myriad_algo ( gate ); break; + case ALGO_NEOSCRYPT: register_neoscrypt_algo ( gate ); break; + case ALGO_NIST5: register_nist5_algo ( gate ); break; + case ALGO_PENTABLAKE: register_pentablake_algo ( gate ); break; + case ALGO_PHI1612: register_phi1612_algo ( gate ); break; + case ALGO_PHI2: register_phi2_algo ( gate ); break; + case ALGO_PLUCK: register_pluck_algo ( gate ); break; + case ALGO_POLYTIMOS: register_polytimos_algo ( gate ); break; + case ALGO_QUARK: register_quark_algo ( gate ); break; + case ALGO_QUBIT: register_qubit_algo ( gate ); break; + case ALGO_SCRYPT: register_scrypt_algo ( gate ); break; + case ALGO_SCRYPTJANE: register_scryptjane_algo ( gate ); break; + case ALGO_SHA256D: register_sha256d_algo ( gate ); break; + case ALGO_SHA256Q: register_sha256q_algo ( gate ); break; + case ALGO_SHA256T: register_sha256t_algo ( gate ); break; + case ALGO_SHAVITE3: register_shavite_algo ( gate ); break; + case ALGO_SKEIN: register_skein_algo ( gate ); break; + case ALGO_SKEIN2: register_skein2_algo ( gate ); break; + case ALGO_SKUNK: register_skunk_algo ( gate ); break; + case ALGO_SONOA: register_sonoa_algo ( gate ); break; + case ALGO_TIMETRAVEL: register_timetravel_algo ( gate ); break; + case ALGO_TIMETRAVEL10: register_timetravel10_algo ( gate ); break; + case ALGO_TRIBUS: register_tribus_algo ( gate ); break; + case ALGO_VANILLA: register_vanilla_algo ( gate ); break; + case ALGO_VELTOR: register_veltor_algo ( gate ); break; + case ALGO_WHIRLPOOL: register_whirlpool_algo ( gate ); break; + case ALGO_WHIRLPOOLX: register_whirlpoolx_algo ( gate ); break; + case ALGO_X11: register_x11_algo ( gate ); break; + case ALGO_X11EVO: register_x11evo_algo ( gate ); break; + case ALGO_X11GOST: register_x11gost_algo ( gate ); break; + case ALGO_X12: register_x12_algo ( gate ); break; + case ALGO_X13: register_x13_algo ( gate ); break; + case ALGO_X13SM3: register_x13sm3_algo ( gate ); break; + case ALGO_X14: register_x14_algo ( gate ); break; + case ALGO_X15: register_x15_algo ( gate ); break; + case ALGO_X16R: register_x16r_algo ( gate ); break; + case ALGO_X16S: register_x16s_algo ( gate ); break; + case ALGO_X17: register_x17_algo ( gate ); break; + case ALGO_XEVAN: register_xevan_algo ( gate ); break; /* case ALGO_YESCRYPT: register_yescrypt_05_algo ( gate ); break; case ALGO_YESCRYPTR8: register_yescryptr8_05_algo ( gate ); break; case ALGO_YESCRYPTR16: register_yescryptr16_05_algo ( gate ); break; case ALGO_YESCRYPTR32: register_yescryptr32_05_algo ( gate ); break; */ - case ALGO_YESCRYPT: register_yescrypt_algo ( gate ); break; - case ALGO_YESCRYPTR8: register_yescryptr8_algo ( gate ); break; - case ALGO_YESCRYPTR16: register_yescryptr16_algo ( gate ); break; - case ALGO_YESCRYPTR32: register_yescryptr32_algo ( gate ); break; - - case ALGO_YESPOWER: register_yespower_algo ( gate ); break; - case ALGO_YESPOWERR16: register_yespowerr16_algo ( gate ); break; - case ALGO_ZR5: register_zr5_algo ( gate ); break; - default: - applog(LOG_ERR,"FAIL: algo_gate registration failed, unknown algo %s.\n", algo_names[opt_algo] ); - return false; - } // switch + case ALGO_YESCRYPT: register_yescrypt_algo ( gate ); break; + case ALGO_YESCRYPTR8: register_yescryptr8_algo ( gate ); break; + case ALGO_YESCRYPTR16: register_yescryptr16_algo ( gate ); break; + case ALGO_YESCRYPTR32: register_yescryptr32_algo ( gate ); break; + case ALGO_YESPOWER: register_yespower_algo ( gate ); break; + case ALGO_YESPOWERR16: register_yespowerr16_algo ( gate ); break; + case ALGO_ZR5: register_zr5_algo ( gate ); break; + default: + applog(LOG_ERR,"FAIL: algo_gate registration failed, unknown algo %s.\n", algo_names[opt_algo] ); + return false; + } // switch - // ensure required functions were defined. + // ensure required functions were defined. if ( gate->scanhash == (void*)&null_scanhash ) { applog(LOG_ERR, "FAIL: Required algo_gate functions undefined\n"); @@ -363,16 +361,16 @@ void get_algo_alias( char** algo_or_alias ) #undef ALIAS #undef PROPER -// only for parallel when there are lanes. bool submit_solution( struct work *work, void *hash, - struct thr_info *thr, int lane ) + struct thr_info *thr ) { work_set_target_ratio( work, hash ); if ( submit_work( thr, work ) ) { - applog( LOG_NOTICE, "Share %d submitted by thread %d, lane %d.", - accepted_share_count + rejected_share_count + 1, - thr->id, lane ); + if ( !opt_quiet ) + applog( LOG_NOTICE, "Share %d submitted by thread %d.", + accepted_share_count + rejected_share_count + 1, + thr->id ); return true; } else @@ -380,4 +378,20 @@ bool submit_solution( struct work *work, void *hash, return false; } +bool submit_lane_solution( struct work *work, void *hash, + struct thr_info *thr, int lane ) +{ + work_set_target_ratio( work, hash ); + if ( submit_work( thr, work ) ) + { + if ( !opt_quiet ) + applog( LOG_NOTICE, "Share %d submitted by thread %d, lane %d.", + accepted_share_count + rejected_share_count + 1, thr->id, + lane ); + return true; + } + else + applog( LOG_WARNING, "Failed to submit share." ); + return false; +} diff --git a/algo-gate-api.h b/algo-gate-api.h index 5b84c552..e2a26b36 100644 --- a/algo-gate-api.h +++ b/algo-gate-api.h @@ -116,7 +116,7 @@ typedef struct // Added a 5th arg for the thread_info structure to replace the int thr id // in the first arg. Both will co-exist during the trasition. //int ( *scanhash ) ( int, struct work*, uint32_t, uint64_t* ); -int ( *scanhash ) ( int, struct work*, uint32_t, uint64_t*, struct thr_info* ); +int ( *scanhash ) ( struct work*, uint32_t, uint64_t*, struct thr_info* ); // optional unsafe, must be overwritten if algo uses function void ( *hash ) ( void*, const void*, uint32_t ) ; @@ -153,7 +153,6 @@ int ntime_index; int nbits_index; int nonce_index; // use with caution, see warning below int work_cmp_size; - } algo_gate_t; extern algo_gate_t algo_gate; @@ -194,9 +193,12 @@ void four_way_not_tested(); // allways returns failure int null_scanhash(); -// The one and only, a callback for scanhash. +// Allow algos to submit from scanhash loop. bool submit_solution( struct work *work, void *hash, - struct thr_info *thr, int lane ); + struct thr_info *thr ); +bool submit_lane_solution( struct work *work, void *hash, + struct thr_info *thr, int lane ); + bool submit_work( struct thr_info *thr, const struct work *work_in ); diff --git a/algo/argon2/argon2a/argon2a.c b/algo/argon2/argon2a/argon2a.c index b9845c22..21bb2a00 100644 --- a/algo/argon2/argon2a/argon2a.c +++ b/algo/argon2/argon2a/argon2a.c @@ -42,12 +42,14 @@ void argon2hash(void *output, const void *input) (unsigned char *)output); } -int scanhash_argon2(int thr_id, struct work* work, uint32_t max_nonce, uint64_t *hashes_done) +int scanhash_argon2( struct work* work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t _ALIGN(64) endiandata[20]; uint32_t _ALIGN(64) hash[8]; uint32_t *pdata = work->data; uint32_t *ptarget = work->target; + int thr_id = mythr->id; // thr_id arg is deprecated const uint32_t first_nonce = pdata[19]; const uint32_t Htarg = ptarget[7]; diff --git a/algo/argon2/argon2d/argon2d-gate.c b/algo/argon2/argon2d/argon2d-gate.c index a4b829b9..32384476 100644 --- a/algo/argon2/argon2d/argon2d-gate.c +++ b/algo/argon2/argon2d/argon2d-gate.c @@ -33,13 +33,14 @@ void argon2d_crds_hash( void *output, const void *input ) argon2_ctx( &context, Argon2_d ); } -int scanhash_argon2d_crds( int thr_id, struct work *work, uint32_t max_nonce, - uint64_t *hashes_done ) +int scanhash_argon2d_crds( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t _ALIGN(64) endiandata[20]; uint32_t _ALIGN(64) hash[8]; uint32_t *pdata = work->data; uint32_t *ptarget = work->target; + int thr_id = mythr->id; // thr_id arg is deprecated const uint32_t first_nonce = pdata[19]; const uint32_t Htarg = ptarget[7]; @@ -103,13 +104,14 @@ void argon2d_dyn_hash( void *output, const void *input ) argon2_ctx( &context, Argon2_d ); } -int scanhash_argon2d_dyn( int thr_id, struct work *work, uint32_t max_nonce, - uint64_t *hashes_done ) +int scanhash_argon2d_dyn( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t _ALIGN(64) endiandata[20]; uint32_t _ALIGN(64) hash[8]; uint32_t *pdata = work->data; uint32_t *ptarget = work->target; + int thr_id = mythr->id; // thr_id arg is deprecated const uint32_t first_nonce = pdata[19]; const uint32_t Htarg = ptarget[7]; @@ -147,8 +149,8 @@ bool register_argon2d_dyn_algo( algo_gate_t* gate ) // Unitus -int scanhash_argon2d4096( int thr_id, struct work *work, uint32_t max_nonce, - uint64_t *hashes_done) +int scanhash_argon2d4096( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t _ALIGN(64) vhash[8]; uint32_t _ALIGN(64) endiandata[20]; @@ -157,7 +159,7 @@ int scanhash_argon2d4096( int thr_id, struct work *work, uint32_t max_nonce, const uint32_t Htarg = ptarget[7]; const uint32_t first_nonce = pdata[19]; uint32_t n = first_nonce; - + int thr_id = mythr->id; // thr_id arg is deprecated uint32_t t_cost = 1; // 1 iteration uint32_t m_cost = 4096; // use 4MB uint32_t parallelism = 1; // 1 thread, 2 lanes diff --git a/algo/argon2/argon2d/argon2d-gate.h b/algo/argon2/argon2d/argon2d-gate.h index d045858f..dbb2b4da 100644 --- a/algo/argon2/argon2d/argon2d-gate.h +++ b/algo/argon2/argon2d/argon2d-gate.h @@ -9,23 +9,23 @@ bool register_argon2d_crds_algo( algo_gate_t* gate ); void argon2d_crds_hash( void *state, const void *input ); -int scanhash_argon2d_crds( int thr_id, struct work *work, uint32_t max_nonce, - uint64_t *hashes_done ); +int scanhash_argon2d_crds( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ); // Dynamic: version = 0x10, m_cost = 500. bool register_argon2d_dyn_algo( algo_gate_t* gate ); void argon2d_dyn_hash( void *state, const void *input ); -int scanhash_argon2d_dyn( int thr_id, struct work *work, uint32_t max_nonce, - uint64_t *hashes_done ); +int scanhash_argon2d_dyn( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ); // Unitus: version = 0x13, m_cost = 4096. bool register_argon2d4096_algo( algo_gate_t* gate ); -int scanhash_argon2d4096( int thr_id, struct work *work, uint32_t max_nonce, - uint64_t *hashes_done ); +int scanhash_argon2d4096( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ); #endif diff --git a/algo/blake/blake-4way.c b/algo/blake/blake-4way.c index ca397696..a1b6d20f 100644 --- a/algo/blake/blake-4way.c +++ b/algo/blake/blake-4way.c @@ -15,11 +15,11 @@ void blakehash_4way(void *state, const void *input) memcpy( &ctx, &blake_4w_ctx, sizeof ctx ); blake256r14_4way( &ctx, input + (64<<2), 16 ); blake256r14_4way_close( &ctx, vhash ); - mm128_deinterleave_4x32( state, state+32, state+64, state+96, vhash, 256 ); + mm128_dintrlv_4x32( state, state+32, state+64, state+96, vhash, 256 ); } -int scanhash_blake_4way( int thr_id, struct work *work, uint32_t max_nonce, - uint64_t *hashes_done ) +int scanhash_blake_4way( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t vdata[20*4] __attribute__ ((aligned (64))); uint32_t hash[8*4] __attribute__ ((aligned (32))); @@ -29,15 +29,14 @@ int scanhash_blake_4way( int thr_id, struct work *work, uint32_t max_nonce, uint32_t HTarget = ptarget[7]; uint32_t _ALIGN(32) edata[20]; uint32_t n = first_nonce; - uint32_t *nonces = work->nonces; - int num_found = 0; + int thr_id = mythr->id; // thr_id arg is deprecated if (opt_benchmark) HTarget = 0x7f; // we need big endian data... swab32_array( edata, pdata, 20 ); - mm128_interleave_4x32( vdata, edata, edata, edata, edata, 640 ); + mm128_intrlv_4x32( vdata, edata, edata, edata, edata, 640 ); blake256r14_4way_init( &blake_4w_ctx ); blake256r14_4way( &blake_4w_ctx, vdata, 64 ); @@ -51,19 +50,17 @@ int scanhash_blake_4way( int thr_id, struct work *work, uint32_t max_nonce, blakehash_4way( hash, vdata ); for ( int i = 0; i < 4; i++ ) - if ( (hash+(i<<3))[7] <= HTarget && fulltest( hash+(i<<3), ptarget ) ) + if ( (hash+(i<<3))[7] <= HTarget ) + if ( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark ) { pdata[19] = n+i; - nonces[ num_found++ ] = n+i; - work_set_target_ratio( work, hash+(i<<3) ); + submit_lane_solution( work, hash+(i<<3), mythr, i ); } n += 4; - } while ( (num_found == 0) && (n < max_nonce) - && !work_restart[thr_id].restart ); - + } while ( (n < max_nonce) && !work_restart[thr_id].restart ); *hashes_done = n - first_nonce + 1; - return num_found; + return 0; } #endif @@ -79,13 +76,13 @@ void blakehash_8way( void *state, const void *input ) memcpy( &ctx, &blake_8w_ctx, sizeof ctx ); blake256r14_8way( &ctx, input + (64<<3), 16 ); blake256r14_8way_close( &ctx, vhash ); - mm256_deinterleave_8x32( state, state+ 32, state+ 64, state+ 96, + mm256_dintrlv_8x32( state, state+ 32, state+ 64, state+ 96, state+128, state+160, state+192, state+224, vhash, 256 ); } -int scanhash_blake_8way( int thr_id, struct work *work, uint32_t max_nonce, - uint64_t *hashes_done ) +int scanhash_blake_8way( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t vdata[20*8] __attribute__ ((aligned (64))); uint32_t hash[8*8] __attribute__ ((aligned (32))); @@ -95,8 +92,7 @@ int scanhash_blake_8way( int thr_id, struct work *work, uint32_t max_nonce, uint32_t HTarget = ptarget[7]; uint32_t _ALIGN(32) edata[20]; uint32_t n = first_nonce; - uint32_t *nonces = work->nonces; - int num_found = 0; + int thr_id = mythr->id; // thr_id arg is deprecated if (opt_benchmark) HTarget = 0x7f; @@ -104,7 +100,7 @@ int scanhash_blake_8way( int thr_id, struct work *work, uint32_t max_nonce, // we need big endian data... swab32_array( edata, pdata, 20 ); - mm256_interleave_8x32( vdata, edata, edata, edata, edata, + mm256_intrlv_8x32( vdata, edata, edata, edata, edata, edata, edata, edata, edata, 640 ); blake256r14_8way_init( &blake_8w_ctx ); @@ -128,17 +124,14 @@ int scanhash_blake_8way( int thr_id, struct work *work, uint32_t max_nonce, if ( (hash+i)[7] <= HTarget && fulltest( hash+i, ptarget ) ) { pdata[19] = n+i; - num_found++; - nonces[i] = n+i; - work_set_target_ratio( work, hash+1 ); + submit_lane_solution( work, hash+(i<<3), mythr, i ); } n += 8; - } while ( (num_found == 0) && (n < max_nonce) - && !work_restart[thr_id].restart ); + } while ( (n < max_nonce) !work_restart[thr_id].restart ); *hashes_done = n - first_nonce + 1; - return num_found; + return 0; } #endif diff --git a/algo/blake/blake-gate.h b/algo/blake/blake-gate.h index ec457961..b8a400b6 100644 --- a/algo/blake/blake-gate.h +++ b/algo/blake/blake-gate.h @@ -10,12 +10,12 @@ #if defined (BLAKE_4WAY) void blakehash_4way(void *state, const void *input); -int scanhash_blake_4way( int thr_id, struct work *work, uint32_t max_nonce, - uint64_t *hashes_done ); +int scanhash_blake_4way( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ); #endif void blakehash( void *state, const void *input ); -int scanhash_blake( int thr_id, struct work *work, uint32_t max_nonce, - uint64_t *hashes_done ); +int scanhash_blake( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ); #endif diff --git a/algo/blake/blake.c b/algo/blake/blake.c index d9e9078a..385822ee 100644 --- a/algo/blake/blake.c +++ b/algo/blake/blake.c @@ -39,8 +39,8 @@ void blakehash(void *state, const void *input) } -int scanhash_blake( int thr_id, struct work *work, uint32_t max_nonce, - uint64_t *hashes_done ) +int scanhash_blake( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t *pdata = work->data; uint32_t *ptarget = work->target; @@ -49,6 +49,7 @@ int scanhash_blake( int thr_id, struct work *work, uint32_t max_nonce, uint32_t _ALIGN(32) hash64[8]; uint32_t _ALIGN(32) endiandata[20]; uint32_t n = first_nonce; + int thr_id = mythr->id; // thr_id arg is deprecated ctx_midstate_done = false; diff --git a/algo/blake/blake2b.c b/algo/blake/blake2b.c index 745cfc7e..6799116a 100644 --- a/algo/blake/blake2b.c +++ b/algo/blake/blake2b.c @@ -35,13 +35,14 @@ static void blake2b_hash_end(uint32_t *output, const uint32_t *input) } */ -int scanhash_blake2b( int thr_id, struct work *work, uint32_t max_nonce, - uint64_t *hashes_done ) +int scanhash_blake2b( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t _ALIGN(A) vhashcpu[8]; uint32_t _ALIGN(A) endiandata[20]; uint32_t *pdata = work->data; uint32_t *ptarget = work->target; + int thr_id = mythr->id; // thr_id arg is deprecated const uint32_t Htarg = ptarget[7]; const uint32_t first_nonce = pdata[8]; diff --git a/algo/blake/blake2s-4way.c b/algo/blake/blake2s-4way.c index a5dc4c41..f4cd130c 100644 --- a/algo/blake/blake2s-4way.c +++ b/algo/blake/blake2s-4way.c @@ -16,13 +16,13 @@ void blake2s_8way_hash( void *output, const void *input ) blake2s_8way_update( &ctx, input + (64<<3), 16 ); blake2s_8way_final( &ctx, vhash, BLAKE2S_OUTBYTES ); - mm256_deinterleave_8x32( output, output+ 32, output+ 64, output+ 96, + mm256_dintrlv_8x32( output, output+ 32, output+ 64, output+ 96, output+128, output+160, output+192, output+224, vhash, 256 ); } -int scanhash_blake2s_8way( int thr_id, struct work *work, uint32_t max_nonce, - uint64_t *hashes_done ) +int scanhash_blake2s_8way( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t vdata[20*8] __attribute__ ((aligned (64))); uint32_t hash[8*8] __attribute__ ((aligned (32))); @@ -32,12 +32,11 @@ int scanhash_blake2s_8way( int thr_id, struct work *work, uint32_t max_nonce, const uint32_t Htarg = ptarget[7]; const uint32_t first_nonce = pdata[19]; uint32_t n = first_nonce; - uint32_t *nonces = work->nonces; - int num_found = 0; uint32_t *noncep = vdata + 152; // 19*8 + int thr_id = mythr->id; // thr_id arg is deprecated swab32_array( edata, pdata, 20 ); - mm256_interleave_8x32( vdata, edata, edata, edata, edata, + mm256_intrlv_8x32( vdata, edata, edata, edata, edata, edata, edata, edata, edata, 640 ); blake2s_8way_init( &blake2s_8w_ctx, BLAKE2S_OUTBYTES ); blake2s_8way_update( &blake2s_8w_ctx, vdata, 64 ); @@ -57,19 +56,18 @@ int scanhash_blake2s_8way( int thr_id, struct work *work, uint32_t max_nonce, for ( int i = 0; i < 8; i++ ) - if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) ) + if ( (hash+(i<<3))[7] <= Htarg ) + if ( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark ) { pdata[19] = n+i; - nonces[ num_found++ ] = n+i; - work_set_target_ratio( work, hash+(i<<3) ); + submit_lane_solution( work, hash+(i<<3), mythr, i ); } n += 8; - } while ( (num_found == 0) && (n < max_nonce) - && !work_restart[thr_id].restart ); + } while ( (n < max_nonce) && !work_restart[thr_id].restart ); *hashes_done = n - first_nonce + 1; - return num_found; + return 0; } #elif defined(BLAKE2S_4WAY) @@ -85,12 +83,12 @@ void blake2s_4way_hash( void *output, const void *input ) blake2s_4way_update( &ctx, input + (64<<2), 16 ); blake2s_4way_final( &ctx, vhash, BLAKE2S_OUTBYTES ); - mm128_deinterleave_4x32( output, output+32, output+64, output+96, + mm128_dintrlv_4x32( output, output+32, output+64, output+96, vhash, 256 ); } -int scanhash_blake2s_4way( int thr_id, struct work *work, uint32_t max_nonce, - uint64_t *hashes_done ) +int scanhash_blake2s_4way( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t vdata[20*4] __attribute__ ((aligned (64))); uint32_t hash[8*4] __attribute__ ((aligned (32))); @@ -100,12 +98,11 @@ int scanhash_blake2s_4way( int thr_id, struct work *work, uint32_t max_nonce, const uint32_t Htarg = ptarget[7]; const uint32_t first_nonce = pdata[19]; uint32_t n = first_nonce; - uint32_t *nonces = work->nonces; - int num_found = 0; uint32_t *noncep = vdata + 76; // 19*4 + int thr_id = mythr->id; // thr_id arg is deprecated swab32_array( edata, pdata, 20 ); - mm128_interleave_4x32( vdata, edata, edata, edata, edata, 640 ); + mm128_intrlv_4x32( vdata, edata, edata, edata, edata, 640 ); blake2s_4way_init( &blake2s_4w_ctx, BLAKE2S_OUTBYTES ); blake2s_4way_update( &blake2s_4w_ctx, vdata, 64 ); @@ -119,19 +116,18 @@ int scanhash_blake2s_4way( int thr_id, struct work *work, uint32_t max_nonce, blake2s_4way_hash( hash, vdata ); for ( int i = 0; i < 4; i++ ) - if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) ) + if ( (hash+(i<<3))[7] <= Htarg ) + if ( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark ) { pdata[19] = n+i; - nonces[ num_found++ ] = n+i; - work_set_target_ratio( work, hash+(i<<3) ); + submit_lane_solution( work, hash+(i<<3), mythr, i ); } n += 4; - } while ( (num_found == 0) && (n < max_nonce) - && !work_restart[thr_id].restart ); + } while ( (n < max_nonce) && !work_restart[thr_id].restart ); *hashes_done = n - first_nonce + 1; - return num_found; + return 0; } #endif diff --git a/algo/blake/blake2s-gate.h b/algo/blake/blake2s-gate.h index 18a9c5bc..ee1a2435 100644 --- a/algo/blake/blake2s-gate.h +++ b/algo/blake/blake2s-gate.h @@ -16,19 +16,19 @@ bool register_blake2s_algo( algo_gate_t* gate ); #if defined(BLAKE2S_8WAY) void blake2s_8way_hash( void *state, const void *input ); -int scanhash_blake2s_8way( int thr_id, struct work *work, uint32_t max_nonce, - uint64_t *hashes_done ); +int scanhash_blake2s_8way( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ); #elif defined (BLAKE2S_4WAY) void blake2s_4way_hash( void *state, const void *input ); -int scanhash_blake2s_4way( int thr_id, struct work *work, uint32_t max_nonce, - uint64_t *hashes_done ); +int scanhash_blake2s_4way( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ); #else void blake2s_hash( void *state, const void *input ); -int scanhash_blake2s( int thr_id, struct work *work, uint32_t max_nonce, - uint64_t *hashes_done ); +int scanhash_blake2s( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ); #endif diff --git a/algo/blake/blake2s.c b/algo/blake/blake2s.c index 3212913c..aee4ce50 100644 --- a/algo/blake/blake2s.c +++ b/algo/blake/blake2s.c @@ -32,14 +32,15 @@ static void blake2s_hash_end(uint32_t *output, const uint32_t *input) blake2s_final(&s_ctx, (uint8_t*) output, BLAKE2S_OUTBYTES); } */ -int scanhash_blake2s(int thr_id, struct work *work, - uint32_t max_nonce, uint64_t *hashes_done) +int scanhash_blake2s( struct work *work, + uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t *pdata = work->data; uint32_t *ptarget = work->target; uint32_t _ALIGN(64) hash64[8]; uint32_t _ALIGN(64) endiandata[20]; + int thr_id = mythr->id; // thr_id arg is deprecated const uint32_t Htarg = ptarget[7]; const uint32_t first_nonce = pdata[19]; diff --git a/algo/blake/blakecoin-4way.c b/algo/blake/blakecoin-4way.c index 95a9a2c5..b0f2c6de 100644 --- a/algo/blake/blakecoin-4way.c +++ b/algo/blake/blakecoin-4way.c @@ -17,11 +17,11 @@ void blakecoin_4way_hash(void *state, const void *input) blake256r8_4way( &ctx, input + (64<<2), 16 ); blake256r8_4way_close( &ctx, vhash ); - mm128_deinterleave_4x32( state, state+32, state+64, state+96, vhash, 256 ); + mm128_dintrlv_4x32( state, state+32, state+64, state+96, vhash, 256 ); } -int scanhash_blakecoin_4way( int thr_id, struct work *work, uint32_t max_nonce, - uint64_t *hashes_done ) +int scanhash_blakecoin_4way( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t vdata[20*4] __attribute__ ((aligned (64))); uint32_t hash[8*4] __attribute__ ((aligned (32))); @@ -31,13 +31,12 @@ int scanhash_blakecoin_4way( int thr_id, struct work *work, uint32_t max_nonce, uint32_t HTarget = ptarget[7]; uint32_t _ALIGN(32) edata[20]; uint32_t n = first_nonce; - uint32_t *nonces = work->nonces; - int num_found = 0; + int thr_id = mythr->id; // thr_id arg is deprecated if ( opt_benchmark ) HTarget = 0x7f; swab32_array( edata, pdata, 20 ); - mm128_interleave_4x32( vdata, edata, edata, edata, edata, 640 ); + mm128_intrlv_4x32( vdata, edata, edata, edata, edata, 640 ); blake256r8_4way_init( &blakecoin_4w_ctx ); blake256r8_4way( &blakecoin_4w_ctx, vdata, 64 ); @@ -51,19 +50,18 @@ int scanhash_blakecoin_4way( int thr_id, struct work *work, uint32_t max_nonce, blakecoin_4way_hash( hash, vdata ); for ( int i = 0; i < 4; i++ ) - if ( (hash+(i<<3))[7] <= HTarget && fulltest( hash+(i<<3), ptarget ) ) + if ( (hash+(i<<3))[7] <= HTarget && fulltest( hash+(i<<3), ptarget ) + && !opt_benchmark ) { pdata[19] = n+i; - nonces[ num_found++ ] = n+i; - work_set_target_ratio( work, hash+(i<<3) ); + submit_lane_solution( work, hash+(i<<3), mythr, i ); } n += 4; - } while ( (num_found == 0) && (n < max_nonce) - && !work_restart[thr_id].restart ); + } while ( (n < max_nonce) && !work_restart[thr_id].restart ); *hashes_done = n - first_nonce + 1; - return num_found; + return 0; } #endif @@ -81,13 +79,13 @@ void blakecoin_8way_hash( void *state, const void *input ) blake256r8_8way( &ctx, input + (64<<3), 16 ); blake256r8_8way_close( &ctx, vhash ); - mm256_deinterleave_8x32( state, state+ 32, state+ 64, state+ 96, + mm256_dintrlv_8x32( state, state+ 32, state+ 64, state+ 96, state+128, state+160, state+192, state+224, vhash, 256 ); } -int scanhash_blakecoin_8way( int thr_id, struct work *work, uint32_t max_nonce, - uint64_t *hashes_done ) +int scanhash_blakecoin_8way( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t vdata[20*8] __attribute__ ((aligned (64))); uint32_t hash[8*8] __attribute__ ((aligned (32))); @@ -97,15 +95,14 @@ int scanhash_blakecoin_8way( int thr_id, struct work *work, uint32_t max_nonce, uint32_t HTarget = ptarget[7]; uint32_t _ALIGN(32) edata[20]; uint32_t n = first_nonce; - uint32_t *nonces = work->nonces; uint32_t *noncep = vdata + 152; // 19*8 - int num_found = 0; + int thr_id = mythr->id; // thr_id arg is deprecated if ( opt_benchmark ) HTarget = 0x7f; // we need big endian data... swab32_array( edata, pdata, 20 ); - mm256_interleave_8x32( vdata, edata, edata, edata, edata, + mm256_intrlv_8x32( vdata, edata, edata, edata, edata, edata, edata, edata, edata, 640 ); blake256r8_8way_init( &blakecoin_8w_ctx ); blake256r8_8way( &blakecoin_8w_ctx, vdata, 64 ); @@ -123,18 +120,17 @@ int scanhash_blakecoin_8way( int thr_id, struct work *work, uint32_t max_nonce, blakecoin_8way_hash( hash, vdata ); for ( int i = 0; i < 8; i++ ) - if ( (hash+(i<<3))[7] <= HTarget && fulltest( hash+(i<<3), ptarget ) ) + if ( (hash+(i<<3))[7] <= HTarget && fulltest( hash+(i<<3), ptarget ) + && !opt_benchmark ) { pdata[19] = n+i; - nonces[ num_found++ ] = n+i; - work_set_target_ratio( work, hash+(i<<3) ); + submit_lane_solution( work, hash+(i<<3), mythr, i ); } n += 8; - } while ( (num_found == 0) && (n < max_nonce) - && !work_restart[thr_id].restart ); + } while ( (n < max_nonce) && !work_restart[thr_id].restart ); *hashes_done = n - first_nonce + 1; - return num_found; + return 0; } #endif diff --git a/algo/blake/blakecoin-gate.h b/algo/blake/blakecoin-gate.h index 456d349b..456aa90e 100644 --- a/algo/blake/blakecoin-gate.h +++ b/algo/blake/blakecoin-gate.h @@ -13,18 +13,18 @@ #if defined (BLAKECOIN_8WAY) void blakecoin_8way_hash(void *state, const void *input); -int scanhash_blakecoin_8way( int thr_id, struct work *work, uint32_t max_nonce, - uint64_t *hashes_done ); +int scanhash_blakecoin_8way( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ); #endif #if defined (BLAKECOIN_4WAY) void blakecoin_4way_hash(void *state, const void *input); -int scanhash_blakecoin_4way( int thr_id, struct work *work, uint32_t max_nonce, - uint64_t *hashes_done ); +int scanhash_blakecoin_4way( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ); #endif void blakecoinhash( void *state, const void *input ); -int scanhash_blakecoin( int thr_id, struct work *work, uint32_t max_nonce, - uint64_t *hashes_done ); +int scanhash_blakecoin( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ); #endif diff --git a/algo/blake/blakecoin.c b/algo/blake/blakecoin.c index 4eda458b..f733c2c5 100644 --- a/algo/blake/blakecoin.c +++ b/algo/blake/blakecoin.c @@ -39,13 +39,14 @@ void blakecoinhash( void *state, const void *input ) memcpy( state, hash, 32 ); } -int scanhash_blakecoin( int thr_id, struct work *work, uint32_t max_nonce, - uint64_t *hashes_done ) +int scanhash_blakecoin( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t *pdata = work->data; uint32_t *ptarget = work->target; const uint32_t first_nonce = pdata[19]; uint32_t HTarget = ptarget[7]; + int thr_id = mythr->id; // thr_id arg is deprecated uint32_t _ALIGN(32) hash64[8]; uint32_t _ALIGN(32) endiandata[20]; diff --git a/algo/blake/decred-4way.c b/algo/blake/decred-4way.c index 59e93bb4..aa5f792f 100644 --- a/algo/blake/decred-4way.c +++ b/algo/blake/decred-4way.c @@ -23,11 +23,11 @@ void decred_hash_4way( void *state, const void *input ) memcpy( &ctx, &blake_mid, sizeof(blake_mid) ); blake256_4way( &ctx, tail, tail_len ); blake256_4way_close( &ctx, vhash ); - mm128_deinterleave_4x32( state, state+32, state+64, state+96, vhash, 256 ); + mm128_dintrlv_4x32( state, state+32, state+64, state+96, vhash, 256 ); } -int scanhash_decred_4way( int thr_id, struct work *work, uint32_t max_nonce, - uint64_t *hashes_done) +int scanhash_decred_4way( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t vdata[48*4] __attribute__ ((aligned (64))); uint32_t hash[8*4] __attribute__ ((aligned (32))); @@ -37,14 +37,13 @@ int scanhash_decred_4way( int thr_id, struct work *work, uint32_t max_nonce, const uint32_t first_nonce = pdata[DECRED_NONCE_INDEX]; uint32_t n = first_nonce; const uint32_t HTarget = opt_benchmark ? 0x7f : ptarget[7]; - uint32_t *nonces = work->nonces; - int num_found = 0; + int thr_id = mythr->id; // thr_id arg is deprecated // copy to buffer guaranteed to be aligned. memcpy( edata, pdata, 180 ); // use the old way until new way updated for size. - mm128_interleave_4x32x( vdata, edata, edata, edata, edata, 180*8 ); + mm128_intrlv_4x32x( vdata, edata, edata, edata, edata, 180*8 ); blake256_4way_init( &blake_mid ); blake256_4way( &blake_mid, vdata, DECRED_MIDSTATE_LEN ); @@ -59,18 +58,17 @@ int scanhash_decred_4way( int thr_id, struct work *work, uint32_t max_nonce, decred_hash_4way( hash, vdata ); for ( int i = 0; i < 4; i++ ) - if ( (hash+(i<<3))[7] <= HTarget && fulltest( hash+(i<<3), ptarget ) ) + if ( (hash+(i<<3))[7] <= HTarget ) + if ( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark ) { pdata[DECRED_NONCE_INDEX] = n+i; - nonces[ num_found++ ] = n+i; - work_set_target_ratio( work, hash+(i<<3) ); + submit_lane_solution( work, hash+(i<<3), mythr, i ); } n += 4; - } while ( (num_found == 0) && (n < max_nonce) - && !work_restart[thr_id].restart ); + } while ( (n < max_nonce) && !work_restart[thr_id].restart ); *hashes_done = n - first_nonce + 1; - return num_found; + return 0; } #endif diff --git a/algo/blake/decred-gate.h b/algo/blake/decred-gate.h index 945878e9..3910b504 100644 --- a/algo/blake/decred-gate.h +++ b/algo/blake/decred-gate.h @@ -14,7 +14,7 @@ #if defined (__AVX2__) //void blakehash_84way(void *state, const void *input); -//int scanhash_blake_8way( int thr_id, struct work *work, uint32_t max_nonce, +//int scanhash_blake_8way( struct work *work, uint32_t max_nonce, // uint64_t *hashes_done ); #endif @@ -24,13 +24,13 @@ #if defined (DECRED_4WAY) void decred_hash_4way(void *state, const void *input); -int scanhash_decred_4way( int thr_id, struct work *work, uint32_t max_nonce, - uint64_t *hashes_done ); +int scanhash_decred_4way( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ); #endif void decred_hash( void *state, const void *input ); -int scanhash_decred( int thr_id, struct work *work, uint32_t max_nonce, - uint64_t *hashes_done ); +int scanhash_decred( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ); #endif diff --git a/algo/blake/decred.c b/algo/blake/decred.c index 80890b28..8645d2a9 100644 --- a/algo/blake/decred.c +++ b/algo/blake/decred.c @@ -52,12 +52,14 @@ void decred_hash_simple(void *state, const void *input) sph_blake256_close(&ctx, state); } -int scanhash_decred(int thr_id, struct work *work, uint32_t max_nonce, uint64_t *hashes_done) +int scanhash_decred( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t _ALIGN(64) endiandata[48]; uint32_t _ALIGN(64) hash32[8]; uint32_t *pdata = work->data; uint32_t *ptarget = work->target; + int thr_id = mythr->id; // thr_id arg is deprecated // #define DCR_NONCE_OFT32 35 diff --git a/algo/blake/pentablake-4way.c b/algo/blake/pentablake-4way.c index b6416803..7033d4a0 100644 --- a/algo/blake/pentablake-4way.c +++ b/algo/blake/pentablake-4way.c @@ -14,7 +14,7 @@ extern void pentablakehash_4way( void *output, const void *input ) { - unsigned char _ALIGN(32) hash[128]; +// unsigned char _ALIGN(32) hash[128]; // // same as uint32_t hashA[16], hashB[16]; // #define hashB hash+64 @@ -29,7 +29,7 @@ extern void pentablakehash_4way( void *output, const void *input ) blake512_4way_init( &ctx ); blake512_4way( &ctx, input, 80 ); blake512_4way_close( &ctx, vhash ); - +/* uint64_t sin0[10], sin1[10], sin2[10], sin3[10]; mm256_deinterleave_4x64( sin0, sin1, sin2, sin3, input, 640 ); sph_blake512_context ctx2_blake; @@ -37,14 +37,14 @@ sph_blake512_init(&ctx2_blake); sph_blake512(&ctx2_blake, sin0, 80); sph_blake512_close(&ctx2_blake, (void*) hash); -mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); +mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); uint64_t* hash64 = (uint64_t*)hash; for( int i = 0; i < 8; i++ ) { if ( hash0[i] != hash64[i] ) printf("hash mismatch %u\n",i); } - +*/ blake512_4way_init( &ctx ); blake512_4way( &ctx, vhash, 64 ); blake512_4way_close( &ctx, vhash ); @@ -61,7 +61,7 @@ for( int i = 0; i < 8; i++ ) blake512_4way( &ctx, vhash, 64 ); blake512_4way_close( &ctx, vhash ); - mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); + mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); memcpy( output, hash0, 32 ); memcpy( output+32, hash1, 32 ); memcpy( output+64, hash2, 32 ); @@ -99,8 +99,8 @@ for( int i = 0; i < 8; i++ ) */ } -int scanhash_pentablake_4way( int thr_id, struct work *work, - uint32_t max_nonce, uint64_t *hashes_done ) +int scanhash_pentablake_4way( struct work *work, + uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t hash[4*8] __attribute__ ((aligned (64))); uint32_t vdata[20*4] __attribute__ ((aligned (64))); @@ -110,9 +110,8 @@ int scanhash_pentablake_4way( int thr_id, struct work *work, uint32_t n = pdata[19] - 1; const uint32_t first_nonce = pdata[19]; const uint32_t Htarg = ptarget[7]; - uint32_t *nonces = work->nonces; - int num_found = 0; uint32_t *noncep = vdata + 73; // 9*8 + 1 + int thr_id = mythr->id; // thr_id arg is deprecated // uint32_t _ALIGN(32) hash64[8]; // uint32_t _ALIGN(32) endiandata[32]; @@ -138,7 +137,7 @@ int scanhash_pentablake_4way( int thr_id, struct work *work, swab32_array( endiandata, pdata, 20 ); uint64_t *edata = (uint64_t*)endiandata; - mm256_interleave_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 ); + mm256_intrlv_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 ); for ( int m=0; m < 6; m++ ) { @@ -155,10 +154,10 @@ int scanhash_pentablake_4way( int thr_id, struct work *work, for ( int i = 0; i < 4; i++ ) if ( !( (hash+(i<<3))[7] & mask ) - && fulltest( hash+(i<<3), ptarget ) ) + && fulltest( hash+(i<<3), ptarget ) && !opt_benchmark ) { - nonces[ num_found++ ] = n+i; - work_set_target_ratio( work, hash+(i<<3) ); + pdata[19] = n + i; + submit_lane_solution( work, hash+(i<<3), mythr, i ); } n += 4; diff --git a/algo/blake/pentablake-gate.h b/algo/blake/pentablake-gate.h index 04aa2b48..0d2d995a 100644 --- a/algo/blake/pentablake-gate.h +++ b/algo/blake/pentablake-gate.h @@ -10,12 +10,12 @@ #if defined(PENTABLAKE_4WAY) void pentablakehash_4way( void *state, const void *input ); -int scanhash_pentablake_4way( int thr_id, struct work *work, - uint32_t max_nonce, uint64_t *hashes_done ); +int scanhash_pentablake_4way( struct work *work, + uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ); #endif void pentablakehash( void *state, const void *input ); -int scanhash_pentablake( int thr_id, struct work *work, uint32_t max_nonce, - uint64_t *hashes_done ); +int scanhash_pentablake( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ); #endif diff --git a/algo/blake/pentablake.c b/algo/blake/pentablake.c index d1686470..55c874c5 100644 --- a/algo/blake/pentablake.c +++ b/algo/blake/pentablake.c @@ -40,8 +40,8 @@ extern void pentablakehash(void *output, const void *input) } -int scanhash_pentablake(int thr_id, struct work *work, uint32_t max_nonce, - uint64_t *hashes_done) +int scanhash_pentablake( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t *pdata = work->data; uint32_t *ptarget = work->target; @@ -49,6 +49,7 @@ int scanhash_pentablake(int thr_id, struct work *work, uint32_t max_nonce, uint32_t n = pdata[19] - 1; const uint32_t first_nonce = pdata[19]; const uint32_t Htarg = ptarget[7]; + int thr_id = mythr->id; // thr_id arg is deprecated uint32_t _ALIGN(32) hash64[8]; uint32_t _ALIGN(32) endiandata[32]; diff --git a/algo/bmw/bmw256.c b/algo/bmw/bmw256.c index 379f632b..39352a7f 100644 --- a/algo/bmw/bmw256.c +++ b/algo/bmw/bmw256.c @@ -19,14 +19,15 @@ void bmwhash(void *output, const void *input) */ } -int scanhash_bmw(int thr_id, struct work *work, - uint32_t max_nonce, uint64_t *hashes_done) +int scanhash_bmw( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t *pdata = work->data; uint32_t *ptarget = work->target; uint32_t _ALIGN(64) hash64[8]; uint32_t _ALIGN(64) endiandata[20]; + int thr_id = mythr->id; const uint32_t Htarg = ptarget[7]; const uint32_t first_nonce = pdata[19]; diff --git a/algo/cryptonight/cryptolight.c b/algo/cryptonight/cryptolight.c index b3e3a661..18e29401 100644 --- a/algo/cryptonight/cryptolight.c +++ b/algo/cryptonight/cryptolight.c @@ -312,8 +312,8 @@ static void cryptolight_hash_ctx_aes_ni(void* output, const void* input, oaes_free((OAES_CTX **) &ctx->aes_ctx); } -int scanhash_cryptolight(int thr_id, struct work *work, - uint32_t max_nonce, uint64_t *hashes_done) +int scanhash_cryptolight( struct work *work, + uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr) { uint32_t *pdata = work->data; uint32_t *ptarget = work->target; @@ -322,6 +322,7 @@ int scanhash_cryptolight(int thr_id, struct work *work, const uint32_t first_nonce = n + 1; //const uint32_t Htarg = ptarget[7]; uint32_t _ALIGN(32) hash[HASH_SIZE / 4]; + int thr_id = mythr->id; struct cryptonight_ctx *ctx = (struct cryptonight_ctx*)malloc(sizeof(struct cryptonight_ctx)); diff --git a/algo/cryptonight/cryptonight-common.c b/algo/cryptonight/cryptonight-common.c index 671704c8..e6c346b6 100644 --- a/algo/cryptonight/cryptonight-common.c +++ b/algo/cryptonight/cryptonight-common.c @@ -70,11 +70,12 @@ void cryptonight_hash_suw( void *restrict output, const void *input ) bool cryptonightV7 = false; -int scanhash_cryptonight( int thr_id, struct work *work, uint32_t max_nonce, - uint64_t *hashes_done ) +int scanhash_cryptonight( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t *pdata = work->data; uint32_t *ptarget = work->target; + int thr_id = mythr->id; uint32_t *nonceptr = (uint32_t*) (((char*)pdata) + 39); uint32_t n = *nonceptr - 1; diff --git a/algo/cryptonight/cryptonight.h b/algo/cryptonight/cryptonight.h index b420c320..43016d00 100644 --- a/algo/cryptonight/cryptonight.h +++ b/algo/cryptonight/cryptonight.h @@ -40,8 +40,8 @@ void cryptonight_hash_ctx(void* output, const void* input, int len); void keccakf(uint64_t st[25], int rounds); extern void (* const extra_hashes[4])(const void *, size_t, char *); -int scanhash_cryptonight( int thr_id, struct work *work, uint32_t max_nonce, - uint64_t *hashes_done ); +int scanhash_cryptonight( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ); void cryptonight_hash_aes( void *restrict output, const void *input, int len ); diff --git a/algo/groestl/aes_ni/brg_endian.h b/algo/groestl/aes_ni/brg_endian.h index e3cf0d11..b8835887 100644 --- a/algo/groestl/aes_ni/brg_endian.h +++ b/algo/groestl/aes_ni/brg_endian.h @@ -43,7 +43,7 @@ # if !defined( __MINGW32__ ) && !defined( _AIX ) # include # if !defined( __BEOS__ ) -# include +//# include # endif # endif #endif diff --git a/algo/groestl/groestl.c b/algo/groestl/groestl.c index 9e6785c0..571c4c0a 100644 --- a/algo/groestl/groestl.c +++ b/algo/groestl/groestl.c @@ -56,14 +56,15 @@ void groestlhash( void *output, const void *input ) memcpy(output, hash, 32); } -int scanhash_groestl( int thr_id, struct work *work, uint32_t max_nonce, - uint64_t *hashes_done ) +int scanhash_groestl( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t *pdata = work->data; uint32_t *ptarget = work->target; uint32_t endiandata[20] __attribute__ ((aligned (64))); const uint32_t first_nonce = pdata[19]; uint32_t nonce = first_nonce; + int thr_id = mythr->id; // thr_id arg is deprecated if (opt_benchmark) ((uint32_t*)ptarget)[7] = 0x0000ff; diff --git a/algo/groestl/myr-groestl.c b/algo/groestl/myr-groestl.c index aa785646..d66260ad 100644 --- a/algo/groestl/myr-groestl.c +++ b/algo/groestl/myr-groestl.c @@ -54,8 +54,8 @@ void myriad_hash(void *output, const void *input) memcpy(output, hash, 32); } -int scanhash_myriad(int thr_id, struct work *work, - uint32_t max_nonce, uint64_t *hashes_done) +int scanhash_myriad( struct work *work, + uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr) { uint32_t *pdata = work->data; uint32_t *ptarget = work->target; @@ -63,6 +63,7 @@ int scanhash_myriad(int thr_id, struct work *work, uint32_t _ALIGN(64) endiandata[20]; const uint32_t first_nonce = pdata[19]; uint32_t nonce = first_nonce; + int thr_id = mythr->id; // thr_id arg is deprecated if (opt_benchmark) ((uint32_t*)ptarget)[7] = 0x0000ff; diff --git a/algo/groestl/myrgr-4way.c b/algo/groestl/myrgr-4way.c index c918e057..6422e2e7 100644 --- a/algo/groestl/myrgr-4way.c +++ b/algo/groestl/myrgr-4way.c @@ -33,7 +33,7 @@ void myriad_4way_hash( void *output, const void *input ) myrgr_4way_ctx_holder ctx; memcpy( &ctx, &myrgr_4way_ctx, sizeof(myrgr_4way_ctx) ); - mm128_deinterleave_4x32( hash0, hash1, hash2, hash3, input, 640 ); + mm128_dintrlv_4x32( hash0, hash1, hash2, hash3, input, 640 ); update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 640 ); memcpy( &ctx.groestl, &myrgr_4way_ctx.groestl, sizeof(hashState_groestl) ); @@ -43,29 +43,30 @@ void myriad_4way_hash( void *output, const void *input ) memcpy( &ctx.groestl, &myrgr_4way_ctx.groestl, sizeof(hashState_groestl) ); update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3, 640 ); - mm128_interleave_4x32( vhash, hash0, hash1, hash2, hash3, 512 ); + mm128_intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 512 ); sha256_4way( &ctx.sha, vhash, 64 ); - sha256_4way_close( &ctx.sha, vhash ); + sha256_4way_close( &ctx.sha, output ); - mm128_deinterleave_4x32( output, output+32, output+64, output+96, - vhash, 256 ); +// sha256_4way_close( &ctx.sha, vhash ); +// mm128_dintrlv_4x32( output, output+32, output+64, output+96, +// vhash, 256 ); } -int scanhash_myriad_4way( int thr_id, struct work *work, uint32_t max_nonce, - uint64_t *hashes_done ) +int scanhash_myriad_4way( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t hash[8*4] __attribute__ ((aligned (64))); uint32_t vdata[20*4] __attribute__ ((aligned (64))); - uint32_t _ALIGN(64) edata[20]; + uint32_t lane_hash[8] __attribute__ ((aligned (64))); + uint32_t *hash7 = &(hash[7<<2]); uint32_t *pdata = work->data; uint32_t *ptarget = work->target; const uint32_t Htarg = ptarget[7]; const uint32_t first_nonce = pdata[19]; uint32_t n = first_nonce; - uint32_t *nonces = work->nonces; - int num_found = 0; - uint32_t *noncep = vdata + 76; // 19*4 + __m128i *noncev = (__m128i*)vdata + 19; // aligned + int thr_id = mythr->id; // thr_id arg is deprecated /* uint32_t *pdata = work->data; @@ -78,31 +79,28 @@ int scanhash_myriad_4way( int thr_id, struct work *work, uint32_t max_nonce, if ( opt_benchmark ) ( (uint32_t*)ptarget )[7] = 0x0000ff; - swab32_array( edata, pdata, 20 ); - mm128_interleave_4x32( vdata, edata, edata, edata, edata, 640 ); - + mm128_bswap_intrlv80_4x32( vdata, pdata ); do { - be32enc( noncep, n ); - be32enc( noncep+1, n+1 ); - be32enc( noncep+2, n+2 ); - be32enc( noncep+3, n+3 ); + *noncev = mm128_bswap_32( _mm_set_epi32( n+3,n+2,n+1,n ) ); myriad_4way_hash( hash, vdata ); pdata[19] = n; - for ( int i = 0; i < 4; i++ ) - if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) ) + for ( int lane = 0; lane < 4; lane++ ) + if ( hash7[ lane ] <= Htarg ) { - pdata[19] = n+i; - nonces[ num_found++ ] = n+i; - work_set_target_ratio( work, hash+(i<<3) ); + mm128_extract_lane_4x32( lane_hash, hash, lane, 256 ); + if ( fulltest( lane_hash, ptarget ) && !opt_benchmark ) + { + pdata[19] = n + lane; + submit_lane_solution( work, lane_hash, mythr, lane ); + } } n += 4; - } while ( (num_found == 0) && (n < max_nonce-4) - && !work_restart[thr_id].restart); + } while ( (n < max_nonce-4) && !work_restart[thr_id].restart); *hashes_done = n - first_nonce + 1; - return num_found; + return 0; } #endif diff --git a/algo/groestl/myrgr-gate.h b/algo/groestl/myrgr-gate.h index 5b391067..89fc5f16 100644 --- a/algo/groestl/myrgr-gate.h +++ b/algo/groestl/myrgr-gate.h @@ -12,8 +12,8 @@ void myriad_4way_hash( void *state, const void *input ); -int scanhash_myriad_4way( int thr_id, struct work *work, uint32_t max_nonce, - uint64_t *hashes_done ); +int scanhash_myriad_4way( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ); void init_myrgr_4way_ctx(); @@ -21,8 +21,8 @@ void init_myrgr_4way_ctx(); void myriad_hash( void *state, const void *input ); -int scanhash_myriad( int thr_id, struct work *work, uint32_t max_nonce, - uint64_t *hashes_done ); +int scanhash_myriad( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ); void init_myrgr_ctx(); diff --git a/algo/heavy/bastion.c b/algo/heavy/bastion.c index fd12b2ee..afbbdab8 100644 --- a/algo/heavy/bastion.c +++ b/algo/heavy/bastion.c @@ -131,12 +131,14 @@ void bastionhash(void *output, const void *input) memcpy(output, hash, 32); } -int scanhash_bastion(int thr_id, struct work *work, uint32_t max_nonce, uint64_t *hashes_done) +int scanhash_bastion( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr) { uint32_t _ALIGN(64) hash32[8]; uint32_t _ALIGN(64) endiandata[20]; uint32_t *pdata = work->data; uint32_t *ptarget = work->target; + int thr_id = mythr->id; // thr_id arg is deprecated const uint32_t Htarg = ptarget[7]; const uint32_t first_nonce = pdata[19]; diff --git a/algo/heavy/heavy.c b/algo/heavy/heavy.c index 88e0bdf5..68e5bc78 100644 --- a/algo/heavy/heavy.c +++ b/algo/heavy/heavy.c @@ -79,11 +79,12 @@ extern void heavyhash(unsigned char* output, const unsigned char* input, int len } -int scanhash_heavy(int thr_id, uint32_t *pdata, const uint32_t *ptarget, - uint32_t max_nonce, uint64_t *hashes_done) +int scanhash_heavy( uint32_t *pdata, const uint32_t *ptarget, + uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr) { uint32_t hash[8]; uint32_t start_nonce = pdata[19]; + int thr_id = mythr->id; // thr_id arg is deprecated do { heavyhash((unsigned char *)hash, (unsigned char *)pdata, 80); diff --git a/algo/hodl/hodl-gate.c b/algo/hodl/hodl-gate.c index 2961e2a0..07fd0fa9 100644 --- a/algo/hodl/hodl-gate.c +++ b/algo/hodl/hodl-gate.c @@ -143,13 +143,13 @@ bool hodl_do_this_thread( int thr_id ) return ( thr_id == 0 ); } -int hodl_scanhash( int thr_id, struct work* work, uint32_t max_nonce, - uint64_t *hashes_done ) +int hodl_scanhash( struct work* work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ) { #if defined(__AES__) - GenRandomGarbage( (CacheEntry*)hodl_scratchbuf, work->data, thr_id ); + GenRandomGarbage( (CacheEntry*)hodl_scratchbuf, work->data, mythr->id ); pthread_barrier_wait( &hodl_barrier ); - return scanhash_hodl_wolf( thr_id, work, max_nonce, hashes_done ); + return scanhash_hodl_wolf( work, max_nonce, hashes_done, thr_info ); #endif return false; } diff --git a/algo/hodl/hodl-wolf.c b/algo/hodl/hodl-wolf.c index 5747e7ea..f8a15c0e 100644 --- a/algo/hodl/hodl-wolf.c +++ b/algo/hodl/hodl-wolf.c @@ -61,13 +61,14 @@ void Rev256(uint32_t *Dest, const uint32_t *Src) } */ -int scanhash_hodl_wolf( int threadNumber, struct work* work, uint32_t max_nonce, - uint64_t *hashes_done ) +int scanhash_hodl_wolf( struct work* work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ) { #ifdef __SSE4_2__ //#ifdef __AVX__ uint32_t *pdata = work->data; uint32_t *ptarget = work->target; + int threadNumber = mythr->id; CacheEntry *Garbage = (CacheEntry*)hodl_scratchbuf; CacheEntry Cache[AES_PARALLEL_N]; __m128i* data[AES_PARALLEL_N]; diff --git a/algo/hodl/hodl-wolf.h b/algo/hodl/hodl-wolf.h index 19ec8914..47c8fb87 100644 --- a/algo/hodl/hodl-wolf.h +++ b/algo/hodl/hodl-wolf.h @@ -19,8 +19,8 @@ typedef union _CacheEntry __m128i dqwords[GARBAGE_SLICE_SIZE >> 4] __attribute__((aligned(16))); } CacheEntry; -int scanhash_hodl_wolf( int thr_id, struct work* work, uint32_t max_nonce, - uint64_t *hashes_done ); +int scanhash_hodl_wolf( struct work* work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ); void GenRandomGarbage( CacheEntry *Garbage, uint32_t *pdata, int thr_id); diff --git a/algo/jh/jha-4way.c b/algo/jh/jha-4way.c index 35df1fbd..ede4b832 100644 --- a/algo/jh/jha-4way.c +++ b/algo/jh/jha-4way.c @@ -46,7 +46,7 @@ void jha_hash_4way( void *out, const void *input ) vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256( vh[0], _mm256_set1_epi64x( 1 ) ), m256_zero ); - mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); + mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); init_groestl( &ctx_groestl, 64 ); update_and_final_groestl( &ctx_groestl, (char*)hash0, (char*)hash0, 512 ); @@ -59,7 +59,7 @@ void jha_hash_4way( void *out, const void *input ) init_groestl( &ctx_groestl, 64 ); update_and_final_groestl( &ctx_groestl, (char*)hash3, (char*)hash3, 512 ); - mm256_interleave_4x64( vhashA, hash0, hash1, hash2, hash3, 512 ); + mm256_intrlv_4x64( vhashA, hash0, hash1, hash2, hash3, 512 ); skein512_4way_init( &ctx_skein ); skein512_4way( &ctx_skein, vhash, 64 ); @@ -77,26 +77,26 @@ void jha_hash_4way( void *out, const void *input ) jh512_4way_close( &ctx_jh, vhashB ); for ( int i = 0; i < 8; i++ ) - vh[i] = _mm256_blendv_epi8( vhA[i], vhB[i], vh_mask ); + casti_m256i( out, i ) = _mm256_blendv_epi8( vhA[i], vhB[i], vh_mask ); } - mm256_deinterleave_4x64( out, out+32, out+64, out+96, vhash, 256 ); +// mm256_dintrlv_4x64( out, out+32, out+64, out+96, vhash, 256 ); } -int scanhash_jha_4way( int thr_id, struct work *work, uint32_t max_nonce, - uint64_t *hashes_done ) +int scanhash_jha_4way( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t hash[8*4] __attribute__ ((aligned (64))); uint32_t vdata[20*4] __attribute__ ((aligned (64))); - uint32_t endiandata[20] __attribute__((aligned(64))); + uint32_t *hash7 = &(hash[25]); + uint32_t lane_hash[8]; uint32_t *pdata = work->data; uint32_t *ptarget = work->target; const uint32_t first_nonce = pdata[19]; const uint32_t Htarg = ptarget[7]; uint32_t n = pdata[19]; - uint32_t *nonces = work->nonces; - int num_found = 0; - uint32_t *noncep = vdata + 73; // 9*8 + 1 + __m256i *noncev = (__m256i*)vdata + 9; // aligned + int thr_id = mythr->id; // thr_id arg is deprecated uint64_t htmax[] = { 0, @@ -115,11 +115,12 @@ int scanhash_jha_4way( int thr_id, struct work *work, uint32_t max_nonce, 0 }; - for ( int i=0; i < 19; i++ ) - be32enc( &endiandata[i], pdata[i] ); +// for ( int i=0; i < 19; i++ ) +// be32enc( &endiandata[i], pdata[i] ); - uint64_t *edata = (uint64_t*)endiandata; - mm256_interleave_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 ); +// uint64_t *edata = (uint64_t*)endiandata; +// mm256_intrlv_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 ); + mm256_bswap_intrlv80_4x64( vdata, pdata ); for ( int m = 0; m < 6; m++ ) { @@ -127,29 +128,36 @@ int scanhash_jha_4way( int thr_id, struct work *work, uint32_t max_nonce, { uint32_t mask = masks[m]; do { - be32enc( noncep, n ); - be32enc( noncep+2, n+1 ); - be32enc( noncep+4, n+2 ); - be32enc( noncep+6, n+3 ); + *noncev = mm256_intrlv_blend_32( mm256_bswap_32( + _mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev ); +// be32enc( noncep, n ); +// be32enc( noncep+2, n+1 ); +// be32enc( noncep+4, n+2 ); +// be32enc( noncep+6, n+3 ); jha_hash_4way( hash, vdata ); pdata[19] = n; - for ( int i = 0; i < 4; i++ ) - if ( ( !( (hash+(i<<3))[7] & mask ) == 0 ) - && fulltest( hash+(i<<3), ptarget ) ) +// for ( int i = 0; i < 4; i++ ) +// if ( ( !( (hash+(i<<3))[7] & mask ) == 0 ) +// && fulltest( hash+(i<<3), ptarget ) ) + for ( int i = 0; i < 4; i++ ) if ( !( (hash7[i] & mask ) == 0 ) ) { - pdata[19] = n; - nonces[ num_found++ ] = n+i; - work_set_target_ratio( work, hash+(i<<3) ); + mm256_extract_lane_4x64( lane_hash, hash, i, 256 ); + if ( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark ) + { + pdata[19] = n+i; + submit_lane_solution( work, lane_hash, mythr, i ); +// nonces[ num_found++ ] = n+i; +// work_set_target_ratio( work, hash+(i<<3) ); + } } n += 4; - } while ( ( num_found == 0 ) && ( n < max_nonce ) - && !work_restart[thr_id].restart ); + } while ( ( n < max_nonce ) && !work_restart[thr_id].restart ); break; } } *hashes_done = n - first_nonce + 1; - return num_found; + return 0; } #endif diff --git a/algo/jh/jha-gate.h b/algo/jh/jha-gate.h index f772d8dd..8a0ddadc 100644 --- a/algo/jh/jha-gate.h +++ b/algo/jh/jha-gate.h @@ -12,14 +12,14 @@ #if defined JHA_4WAY void jha_hash_4way( void *state, const void *input ); -int scanhash_jha_4way( int thr_id, struct work *work, uint32_t max_nonce, - uint64_t *hashes_done ); +int scanhash_jha_4way( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ); #endif void jha_hash( void *state, const void *input ); -int scanhash_jha( int thr_id, struct work *work, uint32_t max_nonce, - uint64_t *hashes_done ); +int scanhash_jha( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ); #endif diff --git a/algo/jh/jha.c b/algo/jh/jha.c index d5ded994..42767e95 100644 --- a/algo/jh/jha.c +++ b/algo/jh/jha.c @@ -81,7 +81,8 @@ void jha_hash(void *output, const void *input) memcpy(output, hash, 32); } -int scanhash_jha(int thr_id, struct work *work, uint32_t max_nonce, uint64_t *hashes_done) +int scanhash_jha( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t _ALIGN(128) hash32[8]; uint32_t _ALIGN(128) endiandata[20]; @@ -89,7 +90,8 @@ int scanhash_jha(int thr_id, struct work *work, uint32_t max_nonce, uint64_t *ha uint32_t *ptarget = work->target; const uint32_t first_nonce = pdata[19]; const uint32_t Htarg = ptarget[7]; - uint32_t n = pdata[19] - 1; + uint32_t n = pdata[19] - 1; + int thr_id = mythr->id; // thr_id arg is deprecated uint64_t htmax[] = { 0, diff --git a/algo/keccak/keccak-4way.c b/algo/keccak/keccak-4way.c index dfe4a4f8..d5e1d29a 100644 --- a/algo/keccak/keccak-4way.c +++ b/algo/keccak/keccak-4way.c @@ -16,8 +16,8 @@ void keccakhash_4way(void *state, const void *input) keccak256_4way_close( &ctx, state ); } -int scanhash_keccak_4way( int thr_id, struct work *work, uint32_t max_nonce, - uint64_t *hashes_done) +int scanhash_keccak_4way( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t vdata[24*4] __attribute__ ((aligned (64))); uint32_t hash[8*4] __attribute__ ((aligned (32))); @@ -27,23 +27,14 @@ int scanhash_keccak_4way( int thr_id, struct work *work, uint32_t max_nonce, uint32_t *ptarget = work->target; uint32_t n = pdata[19]; const uint32_t first_nonce = pdata[19]; + __m256i *noncev = (__m256i*)vdata + 9; // aligned // const uint32_t Htarg = ptarget[7]; - uint32_t endiandata[20]; - uint32_t *nonces = work->nonces; - int num_found = 0; - uint32_t *noncep = vdata + 73; // 9*8 + 1 - - for ( int i=0; i < 19; i++ ) - be32enc( &endiandata[i], pdata[i] ); - - uint64_t *edata = (uint64_t*)endiandata; - mm256_interleave_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 ); + int thr_id = mythr->id; // thr_id arg is deprecated + mm256_bswap_intrlv80_4x64( vdata, pdata ); do { - be32enc( noncep, n ); - be32enc( noncep+2, n+1 ); - be32enc( noncep+4, n+2 ); - be32enc( noncep+6, n+3 ); + *noncev = mm256_intrlv_blend_32( mm256_bswap_32( + _mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev ); keccakhash_4way( hash, vdata ); @@ -54,17 +45,15 @@ int scanhash_keccak_4way( int thr_id, struct work *work, uint32_t max_nonce, if ( fulltest( lane_hash, ptarget ) ) { pdata[19] = n + lane; - nonces[ num_found++ ] = n + lane; - work_set_target_ratio( work, lane_hash ); + submit_lane_solution( work, lane_hash, mythr, lane ); } } n += 4; - } while ( (num_found == 0) && (n < max_nonce-4) - && !work_restart[thr_id].restart); + } while ( (n < max_nonce-4) && !work_restart[thr_id].restart); *hashes_done = n - first_nonce + 1; - return num_found; + return 0; } #endif diff --git a/algo/keccak/keccak-gate.h b/algo/keccak/keccak-gate.h index bdc4164a..e9fc5e79 100644 --- a/algo/keccak/keccak-gate.h +++ b/algo/keccak/keccak-gate.h @@ -11,13 +11,13 @@ #if defined(KECCAK_4WAY) void keccakhash_4way( void *state, const void *input ); -int scanhash_keccak_4way( int thr_id, struct work *work, uint32_t max_nonce, - uint64_t *hashes_done ); +int scanhash_keccak_4way( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ); #endif void keccakhash( void *state, const void *input ); -int scanhash_keccak( int thr_id, struct work *work, uint32_t max_nonce, - uint64_t *hashes_done ); +int scanhash_keccak( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ); #endif diff --git a/algo/keccak/keccak.c b/algo/keccak/keccak.c index 5815f0d1..1a66bc17 100644 --- a/algo/keccak/keccak.c +++ b/algo/keccak/keccak.c @@ -18,14 +18,15 @@ void keccakhash(void *state, const void *input) memcpy(state, hash, 32); } -int scanhash_keccak(int thr_id, struct work *work, - uint32_t max_nonce, uint64_t *hashes_done) +int scanhash_keccak( struct work *work, + uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t *pdata = work->data; uint32_t *ptarget = work->target; uint32_t n = pdata[19] - 1; const uint32_t first_nonce = pdata[19]; //const uint32_t Htarg = ptarget[7]; + int thr_id = mythr->id; // thr_id arg is deprecated uint32_t _ALIGN(32) hash64[8]; uint32_t endiandata[32]; diff --git a/algo/lyra2/allium-4way.c b/algo/lyra2/allium-4way.c index d1e3408e..56d6216e 100644 --- a/algo/lyra2/allium-4way.c +++ b/algo/lyra2/allium-4way.c @@ -87,7 +87,7 @@ void allium_4way_hash( void *state, const void *input ) update_and_final_groestl256( &ctx.groestl, state+96, hash3, 256 ); } -int scanhash_allium_4way( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_allium_4way( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t hash[8*4] __attribute__ ((aligned (64))); @@ -98,7 +98,7 @@ int scanhash_allium_4way( int thr_id, struct work *work, uint32_t max_nonce, uint32_t n = first_nonce; const uint32_t Htarg = ptarget[7]; __m128i *noncev = (__m128i*)vdata + 19; // aligned - /* int */ thr_id = mythr->id; // thr_id arg is deprecated + int thr_id = mythr->id; // thr_id arg is deprecated if ( opt_benchmark ) ( (uint32_t*)ptarget )[7] = 0x0000ff; @@ -118,7 +118,7 @@ int scanhash_allium_4way( int thr_id, struct work *work, uint32_t max_nonce, if ( fulltest( hash+(lane<<3), ptarget ) && !opt_benchmark ) { pdata[19] = n + lane; - submit_solution( work, hash+(lane<<3), mythr, lane ); + submit_lane_solution( work, hash+(lane<<3), mythr, lane ); } } n += 4; diff --git a/algo/lyra2/allium.c b/algo/lyra2/allium.c index f46a037b..593a997f 100644 --- a/algo/lyra2/allium.c +++ b/algo/lyra2/allium.c @@ -69,7 +69,7 @@ void allium_hash(void *state, const void *input) memcpy(state, hash, 32); } -int scanhash_allium( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_allium( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t _ALIGN(128) hash[8]; @@ -80,7 +80,7 @@ int scanhash_allium( int thr_id, struct work *work, uint32_t max_nonce, const uint32_t Htarg = ptarget[7]; const uint32_t first_nonce = pdata[19]; uint32_t nonce = first_nonce; - /* int */ thr_id = mythr->id; // thr_id arg is deprecated + int thr_id = mythr->id; // thr_id arg is deprecated if ( opt_benchmark ) ptarget[7] = 0x3ffff; @@ -94,18 +94,14 @@ int scanhash_allium( int thr_id, struct work *work, uint32_t max_nonce, do { be32enc( &endiandata[19], nonce ); allium_hash( hash, endiandata ); - - if ( hash[7] <= Htarg && fulltest( hash, ptarget ) ) + if ( hash[7] <= Htarg ) + if ( fulltest( hash, ptarget ) && !opt_benchmark ) { - work_set_target_ratio( work, hash ); pdata[19] = nonce; - *hashes_done = pdata[19] - first_nonce; - return 1; + submit_solution( work, hash, mythr ); } nonce++; - - } while (nonce < max_nonce && !work_restart[thr_id].restart); - + } while ( nonce < max_nonce && !work_restart[thr_id].restart ); pdata[19] = nonce; *hashes_done = pdata[19] - first_nonce + 1; return 0; diff --git a/algo/lyra2/lyra2-gate.h b/algo/lyra2/lyra2-gate.h index e7f9e561..8a392ca2 100644 --- a/algo/lyra2/lyra2-gate.h +++ b/algo/lyra2/lyra2-gate.h @@ -19,21 +19,21 @@ bool register_lyra2rev3_algo( algo_gate_t* gate ); #if defined(LYRA2REV3_8WAY) void lyra2rev3_8way_hash( void *state, const void *input ); -int scanhash_lyra2rev3_8way( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_lyra2rev3_8way( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ); bool init_lyra2rev3_8way_ctx(); #elif defined(LYRA2REV3_4WAY) void lyra2rev3_4way_hash( void *state, const void *input ); -int scanhash_lyra2rev3_4way( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_lyra2rev3_4way( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ); bool init_lyra2rev3_4way_ctx(); #else void lyra2rev3_hash( void *state, const void *input ); -int scanhash_lyra2rev3( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_lyra2rev3( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ); bool init_lyra2rev3_ctx(); @@ -52,14 +52,14 @@ bool register_lyra2rev2_algo( algo_gate_t* gate ); #if defined(LYRA2REV2_4WAY) void lyra2rev2_4way_hash( void *state, const void *input ); -int scanhash_lyra2rev2_4way( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_lyra2rev2_4way( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ); bool init_lyra2rev2_4way_ctx(); #else void lyra2rev2_hash( void *state, const void *input ); -int scanhash_lyra2rev2( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_lyra2rev2( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ); bool init_lyra2rev2_ctx(); @@ -80,21 +80,21 @@ bool init_lyra2rev2_ctx(); #if defined(LYRA2Z_8WAY) void lyra2z_8way_hash( void *state, const void *input ); -int scanhash_lyra2z_8way( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_lyra2z_8way( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ); bool lyra2z_8way_thread_init(); #elif defined(LYRA2Z_4WAY) void lyra2z_4way_hash( void *state, const void *input ); -int scanhash_lyra2z_4way( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_lyra2z_4way( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ); bool lyra2z_4way_thread_init(); #else void lyra2z_hash( void *state, const void *input ); -int scanhash_lyra2z( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_lyra2z( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ); bool lyra2z_thread_init(); @@ -111,14 +111,14 @@ bool lyra2z_thread_init(); #if defined(LYRA2H_4WAY) void lyra2h_4way_hash( void *state, const void *input ); -int scanhash_lyra2h_4way( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_lyra2h_4way( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ); bool lyra2h_4way_thread_init(); #else void lyra2h_hash( void *state, const void *input ); -int scanhash_lyra2h( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_lyra2h( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ); bool lyra2h_thread_init(); @@ -135,14 +135,14 @@ bool register_allium_algo( algo_gate_t* gate ); #if defined(ALLIUM_4WAY) void allium_4way_hash( void *state, const void *input ); -int scanhash_allium_4way( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_allium_4way( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ); bool init_allium_4way_ctx(); #else void allium_hash( void *state, const void *input ); -int scanhash_allium( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_allium( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ); bool init_allium_ctx(); @@ -160,14 +160,14 @@ bool register_phi2_algo( algo_gate_t* gate ); #if defined(PHI2_4WAY) void phi2_hash_4way( void *state, const void *input ); -int scanhash_phi2_4way( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_phi2_4way( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ); //void init_phi2_ctx(); #else void phi2_hash( void *state, const void *input ); -int scanhash_phi2( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_phi2( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ); void init_phi2_ctx(); diff --git a/algo/lyra2/lyra2h-4way.c b/algo/lyra2/lyra2h-4way.c index 4207cdd3..9b43bf7c 100644 --- a/algo/lyra2/lyra2h-4way.c +++ b/algo/lyra2/lyra2h-4way.c @@ -48,7 +48,7 @@ void lyra2h_4way_hash( void *state, const void *input ) 32, 16, 16, 16 ); } -int scanhash_lyra2h_4way( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_lyra2h_4way( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t hash[8*4] __attribute__ ((aligned (64))); @@ -59,7 +59,7 @@ int scanhash_lyra2h_4way( int thr_id, struct work *work, uint32_t max_nonce, const uint32_t first_nonce = pdata[19]; uint32_t n = first_nonce; __m128i *noncev = (__m128i*)vdata + 19; // aligned - /* int */ thr_id = mythr->id; // thr_id arg is deprecated + int thr_id = mythr->id; // thr_id arg is deprecated if ( opt_benchmark ) ptarget[7] = 0x0000ff; @@ -76,7 +76,7 @@ int scanhash_lyra2h_4way( int thr_id, struct work *work, uint32_t max_nonce, && !opt_benchmark ) { pdata[19] = n+i; - submit_solution( work, hash+(i<<3), mythr, i ); + submit_lane_solution( work, hash+(i<<3), mythr, i ); } n += 4; } while ( (n < max_nonce-4) && !work_restart[thr_id].restart); diff --git a/algo/lyra2/lyra2h.c b/algo/lyra2/lyra2h.c index 5a054d79..27b5a532 100644 --- a/algo/lyra2/lyra2h.c +++ b/algo/lyra2/lyra2h.c @@ -35,7 +35,7 @@ void lyra2h_hash( void *state, const void *input ) memcpy(state, hash, 32); } -int scanhash_lyra2h( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_lyra2h( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t _ALIGN(64) hash[8]; @@ -45,7 +45,7 @@ int scanhash_lyra2h( int thr_id, struct work *work, uint32_t max_nonce, const uint32_t Htarg = ptarget[7]; const uint32_t first_nonce = pdata[19]; uint32_t nonce = first_nonce; - /* int */ thr_id = mythr->id; // thr_id arg is deprecated + int thr_id = mythr->id; // thr_id arg is deprecated if (opt_benchmark) ptarget[7] = 0x0000ff; @@ -54,22 +54,19 @@ int scanhash_lyra2h( int thr_id, struct work *work, uint32_t max_nonce, be32enc(&endiandata[i], pdata[i]); } - lyra2h_midstate( endiandata ); - + lyra2h_midstate( endiandata ); do { be32enc(&endiandata[19], nonce); lyra2h_hash( hash, endiandata ); - if (hash[7] <= Htarg && fulltest(hash, ptarget)) { - work_set_target_ratio(work, hash); + if ( hash[7] <= Htarg ) + if ( fulltest( hash, ptarget ) && !opt_benchmark ) + { pdata[19] = nonce; - *hashes_done = pdata[19] - first_nonce; - return 1; - } + submit_solution( work, hash, mythr ); + } nonce++; - } while (nonce < max_nonce && !work_restart[thr_id].restart); - pdata[19] = nonce; *hashes_done = pdata[19] - first_nonce + 1; return 0; diff --git a/algo/lyra2/lyra2re.c b/algo/lyra2/lyra2re.c index 4c8c25b1..5766b791 100644 --- a/algo/lyra2/lyra2re.c +++ b/algo/lyra2/lyra2re.c @@ -81,7 +81,7 @@ void lyra2re_hash(void *state, const void *input) memcpy(state, hashA, 32); } -int scanhash_lyra2re( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_lyra2re( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t *pdata = work->data; @@ -91,7 +91,7 @@ int scanhash_lyra2re( int thr_id, struct work *work, uint32_t max_nonce, const uint32_t first_nonce = pdata[19]; uint32_t nonce = first_nonce; const uint32_t Htarg = ptarget[7]; - /* int */ thr_id = mythr->id; // thr_id arg is deprecated + int thr_id = mythr->id; // thr_id arg is deprecated swab32_array( endiandata, pdata, 20 ); @@ -100,20 +100,14 @@ int scanhash_lyra2re( int thr_id, struct work *work, uint32_t max_nonce, do { be32enc(&endiandata[19], nonce); lyra2re_hash(hash, endiandata); - if (hash[7] <= Htarg ) - { - if ( fulltest(hash, ptarget) ) - { + if ( hash[7] <= Htarg ) + if ( fulltest(hash, ptarget) && !opt_benchmark ) + { pdata[19] = nonce; - *hashes_done = pdata[19] - first_nonce; - work_set_target_ratio( work, hash ); - return 1; - } - } + submit_solution( work, hash, mythr ); + } nonce++; - } while (nonce < max_nonce && !work_restart[thr_id].restart); - pdata[19] = nonce; *hashes_done = pdata[19] - first_nonce + 1; return 0; diff --git a/algo/lyra2/lyra2rev2-4way.c b/algo/lyra2/lyra2rev2-4way.c index 77930f48..6e394ed3 100644 --- a/algo/lyra2/lyra2rev2-4way.c +++ b/algo/lyra2/lyra2rev2-4way.c @@ -84,7 +84,7 @@ void lyra2rev2_4way_hash( void *state, const void *input ) bmw256_4way_close( &ctx.bmw, state ); } -int scanhash_lyra2rev2_4way( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_lyra2rev2_4way( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t hash[8*4] __attribute__ ((aligned (64))); @@ -97,7 +97,7 @@ int scanhash_lyra2rev2_4way( int thr_id, struct work *work, uint32_t max_nonce, uint32_t n = first_nonce; const uint32_t Htarg = ptarget[7]; __m128i *noncev = (__m128i*)vdata + 19; // aligned - /* int */ thr_id = mythr->id; // thr_id arg is deprecated + int thr_id = mythr->id; // thr_id arg is deprecated if ( opt_benchmark ) ( (uint32_t*)ptarget )[7] = 0x0000ff; @@ -120,7 +120,7 @@ int scanhash_lyra2rev2_4way( int thr_id, struct work *work, uint32_t max_nonce, if ( fulltest( lane_hash, ptarget ) && !opt_benchmark ) { pdata[19] = n + lane; - submit_solution( work, lane_hash, mythr, lane ); + submit_lane_solution( work, lane_hash, mythr, lane ); } } n += 4; diff --git a/algo/lyra2/lyra2rev2.c b/algo/lyra2/lyra2rev2.c index 88578e7d..618c045d 100644 --- a/algo/lyra2/lyra2rev2.c +++ b/algo/lyra2/lyra2rev2.c @@ -40,31 +40,31 @@ void l2v2_blake256_midstate( const void* input ) void lyra2rev2_hash( void *state, const void *input ) { - lyra2v2_ctx_holder ctx __attribute__ ((aligned (64))); - memcpy( &ctx, &lyra2v2_ctx, sizeof(lyra2v2_ctx) ); - uint8_t hash[128] __attribute__ ((aligned (64))); - #define hashA hash - #define hashB hash+64 - const int midlen = 64; // bytes - const int tail = 80 - midlen; // 16 - - memcpy( &ctx.blake, &l2v2_blake_mid, sizeof l2v2_blake_mid ); + lyra2v2_ctx_holder ctx __attribute__ ((aligned (64))); + memcpy( &ctx, &lyra2v2_ctx, sizeof(lyra2v2_ctx) ); + uint8_t hash[128] __attribute__ ((aligned (64))); + #define hashA hash + #define hashB hash+64 + const int midlen = 64; // bytes + const int tail = 80 - midlen; // 16 + + memcpy( &ctx.blake, &l2v2_blake_mid, sizeof l2v2_blake_mid ); sph_blake256( &ctx.blake, (uint8_t*)input + midlen, tail ); sph_blake256_close( &ctx.blake, hashA ); sph_keccak256( &ctx.keccak, hashA, 32 ); sph_keccak256_close(&ctx.keccak, hashB); - cubehashUpdateDigest( &ctx.cube1, (byte*) hashA, - (const byte*) hashB, 32 ); + cubehashUpdateDigest( &ctx.cube1, (byte*) hashA, + (const byte*) hashB, 32 ); LYRA2REV2( l2v2_wholeMatrix, hashA, 32, hashA, 32, hashA, 32, 1, 4, 4 ); sph_skein256( &ctx.skein, hashA, 32 ); sph_skein256_close( &ctx.skein, hashB ); - cubehashUpdateDigest( &ctx.cube2, (byte*) hashA, - (const byte*) hashB, 32 ); + cubehashUpdateDigest( &ctx.cube2, (byte*) hashA, + (const byte*) hashB, 32 ); sph_bmw256( &ctx.bmw, hashA, 32 ); sph_bmw256_close( &ctx.bmw, hashB ); @@ -72,43 +72,37 @@ void lyra2rev2_hash( void *state, const void *input ) memcpy( state, hashB, 32 ); } -int scanhash_lyra2rev2(int thr_id, struct work *work, +int scanhash_lyra2rev2( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr) { - uint32_t *pdata = work->data; - uint32_t *ptarget = work->target; + uint32_t *pdata = work->data; + uint32_t *ptarget = work->target; uint32_t endiandata[20] __attribute__ ((aligned (64))); - uint32_t hash[8] __attribute__((aligned(64))); + uint32_t hash[8] __attribute__((aligned(64))); const uint32_t first_nonce = pdata[19]; uint32_t nonce = first_nonce; const uint32_t Htarg = ptarget[7]; - /* int */ thr_id = mythr->id; // thr_id arg is deprecated + int thr_id = mythr->id; // thr_id arg is deprecated if (opt_benchmark) ((uint32_t*)ptarget)[7] = 0x0000ff; - swab32_array( endiandata, pdata, 20 ); + swab32_array( endiandata, pdata, 20 ); - l2v2_blake256_midstate( endiandata ); + l2v2_blake256_midstate( endiandata ); do { be32enc(&endiandata[19], nonce); lyra2rev2_hash(hash, endiandata); if (hash[7] <= Htarg ) - { - if( fulltest(hash, ptarget) ) - { + if( fulltest( hash, ptarget ) && !opt_benchmark ) + { pdata[19] = nonce; - work_set_target_ratio( work, hash ); - *hashes_done = pdata[19] - first_nonce; - return 1; - } - } + submit_solution( work, hash, mythr ); + } nonce++; - - } while (nonce < max_nonce && !work_restart[thr_id].restart); - + } while ( nonce < max_nonce && !work_restart[thr_id].restart ); pdata[19] = nonce; *hashes_done = pdata[19] - first_nonce + 1; return 0; diff --git a/algo/lyra2/lyra2rev3-4way.c b/algo/lyra2/lyra2rev3-4way.c index 03261610..a810fa3d 100644 --- a/algo/lyra2/lyra2rev3-4way.c +++ b/algo/lyra2/lyra2rev3-4way.c @@ -86,7 +86,7 @@ void lyra2rev3_8way_hash( void *state, const void *input ) } -int scanhash_lyra2rev3_8way( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_lyra2rev3_8way( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t hash[8*8] __attribute__ ((aligned (64))); @@ -99,7 +99,7 @@ int scanhash_lyra2rev3_8way( int thr_id, struct work *work, uint32_t max_nonce, uint32_t n = first_nonce; const uint32_t Htarg = ptarget[7]; __m256i *noncev = (__m256i*)vdata + 19; // aligned - /* int */ thr_id = mythr->id; // thr_id arg is deprecated + int thr_id = mythr->id; // thr_id arg is deprecated if ( opt_benchmark ) ( (uint32_t*)ptarget )[7] = 0x0000ff; @@ -119,7 +119,7 @@ int scanhash_lyra2rev3_8way( int thr_id, struct work *work, uint32_t max_nonce, if ( fulltest( lane_hash, ptarget ) && !opt_benchmark ) { pdata[19] = n + lane; - submit_solution( work, lane_hash, mythr, lane ); + submit_lane_solution( work, lane_hash, mythr, lane ); } } n += 8; @@ -186,7 +186,7 @@ void lyra2rev3_4way_hash( void *state, const void *input ) bmw256_4way_close( &ctx.bmw, state ); } -int scanhash_lyra2rev3_4way( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_lyra2rev3_4way( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t hash[8*4] __attribute__ ((aligned (64))); @@ -199,7 +199,7 @@ int scanhash_lyra2rev3_4way( int thr_id, struct work *work, uint32_t max_nonce, uint32_t n = first_nonce; const uint32_t Htarg = ptarget[7]; __m128i *noncev = (__m128i*)vdata + 19; // aligned - /* int */ thr_id = mythr->id; // thr_id arg is deprecated + int thr_id = mythr->id; // thr_id arg is deprecated if ( opt_benchmark ) ( (uint32_t*)ptarget )[7] = 0x0000ff; @@ -218,7 +218,7 @@ int scanhash_lyra2rev3_4way( int thr_id, struct work *work, uint32_t max_nonce, if ( fulltest( lane_hash, ptarget ) && !opt_benchmark ) { pdata[19] = n + lane; - submit_solution( work, lane_hash, mythr, lane ); + submit_lane_solution( work, lane_hash, mythr, lane ); } } n += 4; diff --git a/algo/lyra2/lyra2rev3.c b/algo/lyra2/lyra2rev3.c index 3791f0ec..ad3ced6b 100644 --- a/algo/lyra2/lyra2rev3.c +++ b/algo/lyra2/lyra2rev3.c @@ -57,7 +57,7 @@ void lyra2rev3_hash( void *state, const void *input ) memcpy( state, hash, 32 ); } -int scanhash_lyra2rev3( int thr_id, struct work *work, +int scanhash_lyra2rev3( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t *pdata = work->data; @@ -67,7 +67,7 @@ int scanhash_lyra2rev3( int thr_id, struct work *work, const uint32_t first_nonce = pdata[19]; uint32_t nonce = first_nonce; const uint32_t Htarg = ptarget[7]; - /* int */ thr_id = mythr->id; // thr_id arg is deprecated + int thr_id = mythr->id; // thr_id arg is deprecated if (opt_benchmark) ((uint32_t*)ptarget)[7] = 0x0000ff; @@ -78,28 +78,20 @@ int scanhash_lyra2rev3( int thr_id, struct work *work, casti_m128i( endiandata, 2 ) = mm128_bswap_32( casti_m128i( pdata, 2 ) ); casti_m128i( endiandata, 3 ) = mm128_bswap_32( casti_m128i( pdata, 3 ) ); casti_m128i( endiandata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) ); - l2v3_blake256_midstate( endiandata ); - do { be32enc(&endiandata[19], nonce); lyra2rev3_hash(hash, endiandata); - if (hash[7] <= Htarg ) - { - if( fulltest(hash, ptarget) ) - { - pdata[19] = nonce; - work_set_target_ratio( work, hash ); - *hashes_done = pdata[19] - first_nonce; - return 1; - } - } - nonce++; - - } while (nonce < max_nonce && !work_restart[thr_id].restart); - + if (hash[7] <= Htarg ) + if( fulltest( hash, ptarget ) && !opt_benchmark ) + { + pdata[19] = nonce; + submit_solution( work, hash, mythr ); + } + nonce++; + } while ( nonce < max_nonce && !work_restart[thr_id].restart ); pdata[19] = nonce; *hashes_done = pdata[19] - first_nonce + 1; return 0; diff --git a/algo/lyra2/lyra2z-4way.c b/algo/lyra2/lyra2z-4way.c index 0836b15e..5f819356 100644 --- a/algo/lyra2/lyra2z-4way.c +++ b/algo/lyra2/lyra2z-4way.c @@ -44,7 +44,7 @@ void lyra2z_4way_hash( void *state, const void *input ) LYRA2Z( lyra2z_4way_matrix, state+96, 32, hash3, 32, hash3, 32, 8, 8, 8 ); } -int scanhash_lyra2z_4way( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_lyra2z_4way( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t hash[8*4] __attribute__ ((aligned (64))); @@ -55,7 +55,7 @@ int scanhash_lyra2z_4way( int thr_id, struct work *work, uint32_t max_nonce, const uint32_t first_nonce = pdata[19]; uint32_t n = first_nonce; __m128i *noncev = (__m128i*)vdata + 19; // aligned - /* int */ thr_id = mythr->id; // thr_id arg is deprecated + int thr_id = mythr->id; // thr_id arg is deprecated if ( opt_benchmark ) ptarget[7] = 0x0000ff; @@ -74,7 +74,7 @@ int scanhash_lyra2z_4way( int thr_id, struct work *work, uint32_t max_nonce, && !opt_benchmark ) { pdata[19] = n+i; - submit_solution( work, hash+(i<<3), mythr, i ); + submit_lane_solution( work, hash+(i<<3), mythr, i ); } n += 4; } while ( (n < max_nonce-4) && !work_restart[thr_id].restart); @@ -141,7 +141,7 @@ void lyra2z_8way_hash( void *state, const void *input ) memcpy( state+224, hash7, 32 ); } -int scanhash_lyra2z_8way( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_lyra2z_8way( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t hash[8*8] __attribute__ ((aligned (64))); @@ -152,7 +152,7 @@ int scanhash_lyra2z_8way( int thr_id, struct work *work, uint32_t max_nonce, const uint32_t first_nonce = pdata[19]; uint32_t n = first_nonce; __m256i *noncev = (__m256i*)vdata + 19; // aligned - /* int */ thr_id = mythr->id; // thr_id arg is deprecated + int thr_id = mythr->id; // thr_id arg is deprecated if ( opt_benchmark ) ptarget[7] = 0x0000ff; @@ -171,7 +171,7 @@ int scanhash_lyra2z_8way( int thr_id, struct work *work, uint32_t max_nonce, && !opt_benchmark ) { pdata[19] = n+i; - submit_solution( work, hash+(i<<3), mythr, i ); + submit_lane_solution( work, hash+(i<<3), mythr, i ); } n += 8; } while ( (n < max_nonce-8) && !work_restart[thr_id].restart); diff --git a/algo/lyra2/lyra2z.c b/algo/lyra2/lyra2z.c index fd1f4fb6..b1ab0944 100644 --- a/algo/lyra2/lyra2z.c +++ b/algo/lyra2/lyra2z.c @@ -43,7 +43,7 @@ void lyra2z_hash( void *state, const void *input ) memcpy(state, hash, 32); } -int scanhash_lyra2z( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_lyra2z( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t _ALIGN(64) hash[8]; @@ -53,7 +53,7 @@ int scanhash_lyra2z( int thr_id, struct work *work, uint32_t max_nonce, const uint32_t Htarg = ptarget[7]; const uint32_t first_nonce = pdata[19]; uint32_t nonce = first_nonce; - /* int */ thr_id = mythr->id; // thr_id arg is deprecated + int thr_id = mythr->id; // thr_id arg is deprecated if (opt_benchmark) ptarget[7] = 0x0000ff; @@ -68,16 +68,14 @@ int scanhash_lyra2z( int thr_id, struct work *work, uint32_t max_nonce, be32enc(&endiandata[19], nonce); lyra2z_hash( hash, endiandata ); - if (hash[7] <= Htarg && fulltest(hash, ptarget)) { - work_set_target_ratio(work, hash); + if ( hash[7] <= Htarg ) + if ( fulltest( hash, ptarget ) && !opt_benchmark ) + { pdata[19] = nonce; - *hashes_done = pdata[19] - first_nonce; - return 1; - } + submit_solution( work, hash, mythr ); + } nonce++; - - } while (nonce < max_nonce && !work_restart[thr_id].restart); - + } while ( nonce < max_nonce && !work_restart[thr_id].restart ); pdata[19] = nonce; *hashes_done = pdata[19] - first_nonce + 1; return 0; diff --git a/algo/lyra2/lyra2z330.c b/algo/lyra2/lyra2z330.c index 3fac113c..8a6eeece 100644 --- a/algo/lyra2/lyra2z330.c +++ b/algo/lyra2/lyra2z330.c @@ -15,7 +15,7 @@ void lyra2z330_hash(void *state, const void *input, uint32_t height) memcpy(state, hash, 32); } -int scanhash_lyra2z330( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_lyra2z330( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t hash[8] __attribute__ ((aligned (64))); @@ -25,7 +25,7 @@ int scanhash_lyra2z330( int thr_id, struct work *work, uint32_t max_nonce, const uint32_t Htarg = ptarget[7]; const uint32_t first_nonce = pdata[19]; uint32_t nonce = first_nonce; - /* int */ thr_id = mythr->id; // thr_id arg is deprecated + int thr_id = mythr->id; // thr_id arg is deprecated if (opt_benchmark) ptarget[7] = 0x0000ff; @@ -38,21 +38,16 @@ int scanhash_lyra2z330( int thr_id, struct work *work, uint32_t max_nonce, do { - be32enc(&endiandata[19], nonce); + be32enc( &endiandata[19], nonce ); lyra2z330_hash( hash, endiandata, work->height ); - if ( hash[7] <= Htarg && fulltest(hash, ptarget) && !opt_benchmark ) + if ( hash[7] <= Htarg ) + if ( fulltest( hash, ptarget ) && !opt_benchmark ) { - work_set_target_ratio(work, hash); pdata[19] = nonce; - if ( submit_work( mythr, work ) ) - applog( LOG_NOTICE, "Share %d submitted by thread %d", - accepted_share_count + rejected_share_count + 1, - mythr->id ); - else - applog( LOG_WARNING, "Failed to submit share." ); + submit_solution( work, hash, mythr ); } nonce++; - } while (nonce < max_nonce && !work_restart[thr_id].restart); + } while ( nonce < max_nonce && !work_restart[thr_id].restart ); pdata[19] = nonce; *hashes_done = pdata[19] - first_nonce + 1; return 0; diff --git a/algo/lyra2/phi2-4way.c b/algo/lyra2/phi2-4way.c index 321384f1..f4351a2f 100644 --- a/algo/lyra2/phi2-4way.c +++ b/algo/lyra2/phi2-4way.c @@ -161,7 +161,7 @@ void phi2_hash_4way( void *state, const void *input ) memcpy( state, vhash, 128 ); } -int scanhash_phi2_4way( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_phi2_4way( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t _ALIGN(128) hash[8]; @@ -174,7 +174,7 @@ int scanhash_phi2_4way( int thr_id, struct work *work, uint32_t max_nonce, const uint32_t Htarg = ptarget[7]; const uint32_t first_nonce = pdata[19]; uint32_t n = first_nonce; - /* int */ thr_id = mythr->id; // thr_id arg is deprecated + int thr_id = mythr->id; // thr_id arg is deprecated if(opt_benchmark){ ptarget[7] = 0x00ff; @@ -221,7 +221,7 @@ int scanhash_phi2_4way( int thr_id, struct work *work, uint32_t max_nonce, if ( fulltest( lane_hash, ptarget ) && !opt_benchmark ) { pdata[19] = n + lane; - submit_solution( work, lane_hash, mythr, lane ); + submit_lane_solution( work, lane_hash, mythr, lane ); } } n += 4; diff --git a/algo/lyra2/phi2.c b/algo/lyra2/phi2.c index fedc9f2f..cad10b31 100644 --- a/algo/lyra2/phi2.c +++ b/algo/lyra2/phi2.c @@ -92,7 +92,7 @@ void phi2_hash(void *state, const void *input) memcpy(state, hash, 32); } -int scanhash_phi2( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_phi2( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t _ALIGN(128) hash[8]; @@ -102,7 +102,7 @@ int scanhash_phi2( int thr_id, struct work *work, uint32_t max_nonce, const uint32_t Htarg = ptarget[7]; const uint32_t first_nonce = pdata[19]; uint32_t n = first_nonce; - /* int */ thr_id = mythr->id; // thr_id arg is deprecated + int thr_id = mythr->id; // thr_id arg is deprecated if(opt_benchmark){ ptarget[7] = 0x00ff; @@ -111,30 +111,21 @@ int scanhash_phi2( int thr_id, struct work *work, uint32_t max_nonce, phi2_has_roots = false; for ( int i=0; i < 36; i++ ) { - be32enc(&endiandata[i], pdata[i]); - if (i >= 20 && pdata[i]) phi2_has_roots = true; + be32enc(&endiandata[i], pdata[i]); + if ( i >= 20 && pdata[i] ) phi2_has_roots = true; } do { be32enc( &endiandata[19], n ); phi2_hash( hash, endiandata ); - - if ( hash[7] < Htarg && fulltest( hash, ptarget ) ) - { - pdata[19] = n; - work_set_target_ratio( work, hash ); - if ( submit_work( mythr, work ) ) - applog( LOG_NOTICE, "Share %d submitted by thread %d.", - accepted_share_count + rejected_share_count + 1, - thr_id ); - else - applog( LOG_WARNING, "Failed to submit share." ); - *hashes_done = n - first_nonce + 1; - } + if ( hash[7] < Htarg ) + if ( fulltest( hash, ptarget ) && !opt_benchmark ) + { + pdata[19] = n; + submit_solution( work, hash, mythr ); + } n++; - } while ( n < max_nonce && !work_restart[thr_id].restart ); - *hashes_done = n - first_nonce + 1; pdata[19] = n; return 0; diff --git a/algo/m7m.c b/algo/m7m.c index c913cb32..93872613 100644 --- a/algo/m7m.c +++ b/algo/m7m.c @@ -144,7 +144,7 @@ void init_m7m_ctx() #define NM7M 5 #define SW_DIVS 5 #define M7_MIDSTATE_LEN 76 -int scanhash_m7m_hash( int thr_id, struct work* work, uint64_t max_nonce, +int scanhash_m7m_hash( struct work* work, uint64_t max_nonce, unsigned long *hashes_done, struct thr_info *mythr ) { uint32_t *pdata = work->data; @@ -154,7 +154,7 @@ int scanhash_m7m_hash( int thr_id, struct work* work, uint64_t max_nonce, uint32_t hash[8] __attribute__((aligned(64))); uint8_t bhash[7][64] __attribute__((aligned(64))); uint32_t n = pdata[19] - 1; - /* int */ thr_id = mythr->id; // thr_id arg is deprecated + int thr_id = mythr->id; // thr_id arg is deprecated uint32_t usw_, mpzscale; const uint32_t first_nonce = pdata[19]; char data_str[161], hash_str[65], target_str[65]; diff --git a/algo/nist5/nist5-4way.c b/algo/nist5/nist5-4way.c index 5556fa65..e0d2f093 100644 --- a/algo/nist5/nist5-4way.c +++ b/algo/nist5/nist5-4way.c @@ -35,7 +35,7 @@ void nist5hash_4way( void *out, const void *input ) blake512_4way( &ctx_blake, input, 80 ); blake512_4way_close( &ctx_blake, vhash ); - mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); + mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); init_groestl( &ctx_groestl, 64 ); update_and_final_groestl( &ctx_groestl, (char*)hash0, @@ -50,7 +50,7 @@ void nist5hash_4way( void *out, const void *input ) update_and_final_groestl( &ctx_groestl, (char*)hash3, (const char*)hash3, 512 ); - mm256_interleave_4x64( vhash, hash0, hash1, hash2, hash3, 512 ); + mm256_intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 ); jh512_4way_init( &ctx_jh ); jh512_4way( &ctx_jh, vhash, 64 ); @@ -65,8 +65,8 @@ void nist5hash_4way( void *out, const void *input ) skein512_4way_close( &ctx_skein, out ); } -int scanhash_nist5_4way( int thr_id, struct work *work, uint32_t max_nonce, - uint64_t *hashes_done) +int scanhash_nist5_4way( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t hash[4*16] __attribute__ ((aligned (64))); uint32_t *hash7 = &(hash[25]); @@ -78,9 +78,8 @@ int scanhash_nist5_4way( int thr_id, struct work *work, uint32_t max_nonce, uint32_t n = pdata[19]; const uint32_t first_nonce = pdata[19]; const uint32_t Htarg = ptarget[7]; - uint32_t *nonces = work->nonces; - int num_found = 0; uint32_t *noncep = vdata + 73; // 9*8 + 1 + int thr_id = mythr->id; // thr_id arg is deprecated uint64_t htmax[] = { 0, 0xF, @@ -100,7 +99,7 @@ int scanhash_nist5_4way( int thr_id, struct work *work, uint32_t max_nonce, swab32_array( endiandata, pdata, 20 ); uint64_t *edata = (uint64_t*)endiandata; - mm256_interleave_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 ); + mm256_intrlv_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 ); // precalc midstate // blake512_4way_init( &ctx_mid ); @@ -124,22 +123,19 @@ int scanhash_nist5_4way( int thr_id, struct work *work, uint32_t max_nonce, if ( ( hash7[ lane ] & mask ) == 0 ) { mm256_extract_lane_4x64( lane_hash, hash, lane, 256 ); - if ( fulltest( lane_hash, ptarget ) ) + if ( fulltest( lane_hash, ptarget ) && !opt_benchmark ) { pdata[19] = n + lane; - nonces[ num_found++ ] = n + lane; - work_set_target_ratio( work, lane_hash ); + submit_lane_solution( work, lane_hash, mythr, lane ); } } n += 4; - } while ( ( num_found == 0 ) && ( n < max_nonce ) - && !work_restart[thr_id].restart ); + } while ( ( n < max_nonce ) && !work_restart[thr_id].restart ); break; } } - *hashes_done = n - first_nonce + 1; - return num_found; + return 0; } #endif diff --git a/algo/nist5/nist5-gate.h b/algo/nist5/nist5-gate.h index 6bf0dab8..80828b7d 100644 --- a/algo/nist5/nist5-gate.h +++ b/algo/nist5/nist5-gate.h @@ -12,15 +12,15 @@ void nist5hash_4way( void *state, const void *input ); -int scanhash_nist5_4way( int thr_id, struct work *work, uint32_t max_nonce, - uint64_t *hashes_done ); +int scanhash_nist5_4way( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ); #else void nist5hash( void *state, const void *input ); -int scanhash_nist5( int thr_id, struct work *work, uint32_t max_nonce, - uint64_t *hashes_done ); +int scanhash_nist5( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ); void init_nist5_ctx(); #endif diff --git a/algo/nist5/nist5.c b/algo/nist5/nist5.c index 0bbc9a9d..431fb714 100644 --- a/algo/nist5/nist5.c +++ b/algo/nist5/nist5.c @@ -81,8 +81,8 @@ void nist5hash(void *output, const void *input) memcpy(output, hash, 32); } -int scanhash_nist5(int thr_id, struct work *work, - uint32_t max_nonce, uint64_t *hashes_done) +int scanhash_nist5( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr) { uint32_t endiandata[20] __attribute__((aligned(64))); uint32_t hash64[8] __attribute__((aligned(32))); @@ -90,6 +90,7 @@ int scanhash_nist5(int thr_id, struct work *work, uint32_t *ptarget = work->target; uint32_t n = pdata[19] - 1; const uint32_t first_nonce = pdata[19]; + int thr_id = mythr->id; // thr_id arg is deprecated const uint32_t Htarg = ptarget[7]; uint64_t htmax[] = { diff --git a/algo/nist5/zr5.c b/algo/nist5/zr5.c index 3847b920..9ec6e19b 100644 --- a/algo/nist5/zr5.c +++ b/algo/nist5/zr5.c @@ -144,8 +144,8 @@ static const int arrOrder[][4] = memcpy(state, hash, 32); } -int scanhash_zr5( int thr_id, struct work *work, - uint32_t max_nonce, unsigned long *hashes_done) +int scanhash_zr5( struct work *work, uint32_t max_nonce, + unsigned long *hashes_done, struct thr_info *mythr ) { uint32_t *pdata = work->data; uint32_t *ptarget = work->target; @@ -154,6 +154,7 @@ int scanhash_zr5( int thr_id, struct work *work, const uint32_t version = pdata[0] & (~POK_DATA_MASK); const uint32_t first_nonce = pdata[19]; uint32_t nonce = first_nonce; + int thr_id = mythr->id; // thr_id arg is deprecated memcpy(tmpdata, pdata, 80); diff --git a/algo/quark/anime-4way.c b/algo/quark/anime-4way.c index d68cd37e..f493e9c9 100644 --- a/algo/quark/anime-4way.c +++ b/algo/quark/anime-4way.c @@ -160,7 +160,7 @@ void anime_4way_hash( void *state, const void *input ) mm256_dintrlv_4x64( state, state+32, state+64, state+96, vhash, 256 ); } -int scanhash_anime_4way( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_anime_4way( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t hash[4*8] __attribute__ ((aligned (64))); @@ -170,7 +170,7 @@ int scanhash_anime_4way( int thr_id, struct work *work, uint32_t max_nonce, uint32_t n = pdata[19]; const uint32_t first_nonce = pdata[19]; __m256i *noncev = (__m256i*)vdata + 9; // aligned - /* int */ thr_id = mythr->id; // thr_id arg is deprecated + int thr_id = mythr->id; // thr_id arg is deprecated const uint32_t Htarg = ptarget[7]; uint64_t htmax[] = { 0, @@ -209,7 +209,7 @@ int scanhash_anime_4way( int thr_id, struct work *work, uint32_t max_nonce, && fulltest( hash+(i<<3), ptarget ) && !opt_benchmark ) { pdata[19] = n+i; - submit_solution( work, hash+(i<<3), mythr, i ); + submit_lane_solution( work, hash+(i<<3), mythr, i ); } n += 4; } while ( ( n < max_nonce ) && !work_restart[thr_id].restart ); diff --git a/algo/quark/anime-gate.h b/algo/quark/anime-gate.h index 5dfbfece..fdf34b4c 100644 --- a/algo/quark/anime-gate.h +++ b/algo/quark/anime-gate.h @@ -13,14 +13,14 @@ bool register_anime_algo( algo_gate_t* gate ); #if defined(ANIME_4WAY) void anime_4way_hash( void *state, const void *input ); -int scanhash_anime_4way( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_anime_4way( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ); void init_anime_4way_ctx(); #endif void anime_hash( void *state, const void *input ); -int scanhash_anime( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_anime( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ); void init_anime_ctx(); diff --git a/algo/quark/anime.c b/algo/quark/anime.c index 3f33485b..545f2730 100644 --- a/algo/quark/anime.c +++ b/algo/quark/anime.c @@ -119,7 +119,7 @@ void anime_hash( void *state, const void *input ) memcpy( state, hash, 32 ); } -int scanhash_anime( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_anime( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr) { uint32_t hash[8] __attribute__ ((aligned (64))); @@ -128,7 +128,7 @@ int scanhash_anime( int thr_id, struct work *work, uint32_t max_nonce, uint32_t *ptarget = work->target; uint32_t n = pdata[19]; const uint32_t first_nonce = pdata[19]; - /* int */ thr_id = mythr->id; // thr_id arg is deprecated + int thr_id = mythr->id; // thr_id arg is deprecated const uint32_t Htarg = ptarget[7]; uint64_t htmax[] = { 0, diff --git a/algo/quark/hmq1725-4way.c b/algo/quark/hmq1725-4way.c index 30263a90..dfb681a4 100644 --- a/algo/quark/hmq1725-4way.c +++ b/algo/quark/hmq1725-4way.c @@ -47,7 +47,7 @@ typedef union _hmq1725_4way_context_overlay hmq1725_4way_context_overlay; extern void hmq1725_4way_hash(void *state, const void *input) { -// why so big? only really need 8, haval thing uses 16. +// why so big? only really need 16. uint32_t hash0 [32] __attribute__ ((aligned (64))); uint32_t hash1 [32] __attribute__ ((aligned (64))); uint32_t hash2 [32] __attribute__ ((aligned (64))); @@ -570,11 +570,11 @@ extern void hmq1725_4way_hash(void *state, const void *input) memcpy(state, vhash, 32<<2 ); } -int scanhash_hmq1725_4way( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_hmq1725_4way( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t hash[4*8] __attribute__ ((aligned (64))); -// uint32_t *hash7 = &(hash[7<<2]); +// uint32_t *hash7 = &(hash[25]); // uint32_t lane_hash[8]; uint32_t vdata[24*4] __attribute__ ((aligned (64))); uint32_t *pdata = work->data; @@ -582,7 +582,7 @@ int scanhash_hmq1725_4way( int thr_id, struct work *work, uint32_t max_nonce, uint32_t n = pdata[19] - 1; const uint32_t first_nonce = pdata[19]; __m256i *noncev = (__m256i*)vdata + 9; // aligned - /* int */ thr_id = mythr->id; // thr_id arg is deprecated + int thr_id = mythr->id; // thr_id arg is deprecated const uint32_t Htarg = ptarget[7]; uint64_t htmax[] = { 0, 0xF, 0xFF, 0xFFF, 0xFFFF, 0x10000000 }; @@ -604,7 +604,7 @@ int scanhash_hmq1725_4way( int thr_id, struct work *work, uint32_t max_nonce, if ( fulltest( (hash+(i<<3)), ptarget ) && !opt_benchmark ) { pdata[19] = n + i; - submit_solution( work, (hash+(i<<3)), mythr, i ); + submit_lane_solution( work, (hash+(i<<3)), mythr, i ); } } n += 4; diff --git a/algo/quark/hmq1725-gate.h b/algo/quark/hmq1725-gate.h index 9521cd27..4f77fd0a 100644 --- a/algo/quark/hmq1725-gate.h +++ b/algo/quark/hmq1725-gate.h @@ -13,13 +13,13 @@ bool register_hmq1725_algo( algo_gate_t* gate ); #if defined(HMQ1725_4WAY) void hmq1725_4way_hash( void *state, const void *input ); -int scanhash_hmq1725_4way( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_hmq1725_4way( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ); #else void hmq1725hash( void *state, const void *input ); -int scanhash_hmq1725( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_hmq1725( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ); void init_hmq1725_ctx(); diff --git a/algo/quark/hmq1725.c b/algo/quark/hmq1725.c index 66d081b6..8b719111 100644 --- a/algo/quark/hmq1725.c +++ b/algo/quark/hmq1725.c @@ -298,7 +298,7 @@ extern void hmq1725hash(void *state, const void *input) memcpy(state, hashA, 32); } -int scanhash_hmq1725( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_hmq1725( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ) { // uint32_t endiandata[32] __attribute__((aligned(64))); @@ -308,7 +308,7 @@ int scanhash_hmq1725( int thr_id, struct work *work, uint32_t max_nonce, uint32_t *ptarget = work->target; uint32_t n = pdata[19] - 1; const uint32_t first_nonce = pdata[19]; - /* int */ thr_id = mythr->id; // thr_id arg is deprecated + int thr_id = mythr->id; // thr_id arg is deprecated //const uint32_t Htarg = ptarget[7]; //we need bigendian data... diff --git a/algo/quark/quark-4way.c b/algo/quark/quark-4way.c index a2237bfa..38e1e127 100644 --- a/algo/quark/quark-4way.c +++ b/algo/quark/quark-4way.c @@ -165,7 +165,7 @@ void quark_4way_hash( void *state, const void *input ) casti_m256i( state, 3 ) = _mm256_blendv_epi8( vhA[3], vhB[3], vh_mask ); } -int scanhash_quark_4way( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_quark_4way( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t hash[4*8] __attribute__ ((aligned (64))); @@ -177,7 +177,7 @@ int scanhash_quark_4way( int thr_id, struct work *work, uint32_t max_nonce, uint32_t n = pdata[19]; const uint32_t first_nonce = pdata[19]; __m256i *noncev = (__m256i*)vdata + 9; // aligned - /* int */ thr_id = mythr->id; // thr_id arg is deprecated + int thr_id = mythr->id; // thr_id arg is deprecated mm256_bswap_intrlv80_4x64( vdata, pdata ); do @@ -195,7 +195,7 @@ int scanhash_quark_4way( int thr_id, struct work *work, uint32_t max_nonce, if ( fulltest( lane_hash, ptarget ) && !opt_benchmark ) { pdata[19] = n+i; - submit_solution( work, lane_hash, mythr, i ); + submit_lane_solution( work, lane_hash, mythr, i ); } } n += 4; diff --git a/algo/quark/quark-gate.h b/algo/quark/quark-gate.h index 20b57503..e97b20dc 100644 --- a/algo/quark/quark-gate.h +++ b/algo/quark/quark-gate.h @@ -13,14 +13,14 @@ bool register_quark_algo( algo_gate_t* gate ); #if defined(QUARK_4WAY) void quark_4way_hash( void *state, const void *input ); -int scanhash_quark_4way( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_quark_4way( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ); void init_quark_4way_ctx(); #endif void quark_hash( void *state, const void *input ); -int scanhash_quark( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_quark( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ); void init_quark_ctx(); diff --git a/algo/quark/quark.c b/algo/quark/quark.c index 6f3ca6c3..638e6297 100644 --- a/algo/quark/quark.c +++ b/algo/quark/quark.c @@ -172,7 +172,7 @@ void quark_hash(void *state, const void *input) memcpy(state, hash, 32); } -int scanhash_quark( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_quark( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t endiandata[20] __attribute__((aligned(64))); @@ -181,7 +181,7 @@ int scanhash_quark( int thr_id, struct work *work, uint32_t max_nonce, uint32_t *ptarget = work->target; uint32_t n = pdata[19] - 1; const uint32_t first_nonce = pdata[19]; - /* int */ thr_id = mythr->id; // thr_id arg is deprecated + int thr_id = mythr->id; // thr_id arg is deprecated swab32_array( endiandata, pdata, 20 ); diff --git a/algo/qubit/deep-2way.c b/algo/qubit/deep-2way.c index ad252d9c..00609324 100644 --- a/algo/qubit/deep-2way.c +++ b/algo/qubit/deep-2way.c @@ -39,7 +39,7 @@ void deep_2way_hash( void *output, const void *input ) memcpy( &ctx, &deep_2way_ctx, sizeof(deep_2way_ctx) ); luffa_2way_update( &ctx.luffa, input + (64<<1), 16 ); luffa_2way_close( &ctx.luffa, vhash ); - mm256_deinterleave_2x128( hash0, hash1, vhash, 512 ); + mm256_dintrlv_2x128( hash0, hash1, vhash, 512 ); cubehashUpdateDigest( &ctx.cube, (byte*)hash0, (const byte*) hash0, 64 ); @@ -63,7 +63,7 @@ void deep_2way_hash( void *output, const void *input ) memcpy( output+32, hash1, 32 ); } -int scanhash_deep_2way( int thr_id, struct work *work,uint32_t max_nonce, +int scanhash_deep_2way( struct work *work,uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t hash[4*8] __attribute__ ((aligned (64))); @@ -74,7 +74,7 @@ int scanhash_deep_2way( int thr_id, struct work *work,uint32_t max_nonce, uint32_t n = pdata[19]; const uint32_t first_nonce = pdata[19]; uint32_t *noncep = vdata + 32+3; // 4*8 + 3 - /* int */ thr_id = mythr->id; // thr_id arg is deprecated + int thr_id = mythr->id; // thr_id arg is deprecated const uint32_t Htarg = ptarget[7]; uint64_t htmax[] = { 0, 0xF, 0xFF, 0xFFF, 0xFFFF, 0x10000000 }; @@ -86,7 +86,7 @@ int scanhash_deep_2way( int thr_id, struct work *work,uint32_t max_nonce, casti_m128i( endiandata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) ); uint64_t *edata = (uint64_t*)endiandata; - mm256_interleave_2x128( (uint64_t*)vdata, edata, edata, 640 ); + mm256_intrlv_2x128( (uint64_t*)vdata, edata, edata, 640 ); luffa_2way_init( &deep_2way_ctx.luffa, 512 ); luffa_2way_update( &deep_2way_ctx.luffa, vdata, 64 ); @@ -106,13 +106,13 @@ int scanhash_deep_2way( int thr_id, struct work *work,uint32_t max_nonce, if ( fulltest( hash, ptarget) && !opt_benchmark ) { pdata[19] = n; - submit_solution( work, hash, mythr, 0 ); + submit_lane_solution( work, hash, mythr, 0 ); } if ( !( (hash+8)[7] & mask ) ) if ( fulltest( hash+8, ptarget) && !opt_benchmark ) { pdata[19] = n+1; - submit_solution( work, hash+8, mythr, 1 ); + submit_lane_solution( work, hash+8, mythr, 1 ); } n += 2; } while ( ( n < max_nonce ) && !work_restart[thr_id].restart ); diff --git a/algo/qubit/deep-gate.h b/algo/qubit/deep-gate.h index ded8f28e..1d1c9326 100644 --- a/algo/qubit/deep-gate.h +++ b/algo/qubit/deep-gate.h @@ -13,14 +13,14 @@ bool register_deep_algo( algo_gate_t* gate ); #if defined(DEEP_2WAY) void deep_2way_hash( void *state, const void *input ); -int scanhash_deep_2way( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_deep_2way( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ); void init_deep_2way_ctx(); #endif void deep_hash( void *state, const void *input ); -int scanhash_deep( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_deep( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ); void init_deep_ctx(); diff --git a/algo/qubit/deep.c b/algo/qubit/deep.c index 9dc24a28..b48f0d0b 100644 --- a/algo/qubit/deep.c +++ b/algo/qubit/deep.c @@ -71,7 +71,7 @@ void deep_hash(void *output, const void *input) memcpy(output, hash, 32); } -int scanhash_deep( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_deep( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t endiandata[20] __attribute__((aligned(64))); @@ -80,7 +80,7 @@ int scanhash_deep( int thr_id, struct work *work, uint32_t max_nonce, uint32_t *ptarget = work->target; uint32_t n = pdata[19] - 1; const uint32_t first_nonce = pdata[19]; - /* int */ thr_id = mythr->id; // thr_id arg is deprecated + int thr_id = mythr->id; // thr_id arg is deprecated const uint32_t Htarg = ptarget[7]; uint64_t htmax[] = { 0, 0xF, 0xFF, 0xFFF, 0xFFFF, 0x10000000 }; diff --git a/algo/qubit/qubit-2way.c b/algo/qubit/qubit-2way.c index dc7d4ad0..038cec03 100644 --- a/algo/qubit/qubit-2way.c +++ b/algo/qubit/qubit-2way.c @@ -41,7 +41,7 @@ void qubit_2way_hash( void *output, const void *input ) memcpy( &ctx, &qubit_2way_ctx, sizeof(qubit_2way_ctx) ); luffa_2way_update( &ctx.luffa, input + (64<<1), 16 ); luffa_2way_close( &ctx.luffa, vhash ); - mm256_deinterleave_2x128( hash0, hash1, vhash, 512 ); + mm256_dintrlv_2x128( hash0, hash1, vhash, 512 ); cubehashUpdateDigest( &ctx.cube, (byte*)hash0, (const byte*) hash0, 64 ); @@ -55,9 +55,9 @@ void qubit_2way_hash( void *output, const void *input ) sph_shavite512( &ctx.shavite, hash1, 64 ); sph_shavite512_close( &ctx.shavite, hash1 ); - mm256_interleave_2x128( vhash, hash0, hash1, 512 ); + mm256_intrlv_2x128( vhash, hash0, hash1, 512 ); simd_2way_update_close( &ctx.simd, vhash, vhash, 512 ); - mm256_deinterleave_2x128( hash0, hash1, vhash, 512 ); + mm256_dintrlv_2x128( hash0, hash1, vhash, 512 ); update_final_echo( &ctx.echo, (BitSequence *)hash0, (const BitSequence *) hash0, 512 ); @@ -69,7 +69,7 @@ void qubit_2way_hash( void *output, const void *input ) memcpy( output+32, hash1, 32 ); } -int scanhash_qubit_2way( int thr_id, struct work *work,uint32_t max_nonce, +int scanhash_qubit_2way( struct work *work,uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t hash[4*8] __attribute__ ((aligned (64))); @@ -80,7 +80,7 @@ int scanhash_qubit_2way( int thr_id, struct work *work,uint32_t max_nonce, uint32_t n = pdata[19]; const uint32_t first_nonce = pdata[19]; uint32_t *noncep = vdata + 32+3; // 4*8 + 3 - /* int */ thr_id = mythr->id; // thr_id arg is deprecated + int thr_id = mythr->id; // thr_id arg is deprecated const uint32_t Htarg = ptarget[7]; uint64_t htmax[] = { 0, 0xF, 0xFF, 0xFFF, 0xFFFF, 0x10000000 }; @@ -92,7 +92,7 @@ int scanhash_qubit_2way( int thr_id, struct work *work,uint32_t max_nonce, casti_m128i( endiandata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) ); uint64_t *edata = (uint64_t*)endiandata; - mm256_interleave_2x128( (uint64_t*)vdata, edata, edata, 640 ); + mm256_intrlv_2x128( (uint64_t*)vdata, edata, edata, 640 ); luffa_2way_init( &qubit_2way_ctx.luffa, 512 ); luffa_2way_update( &qubit_2way_ctx.luffa, vdata, 64 ); @@ -111,13 +111,13 @@ int scanhash_qubit_2way( int thr_id, struct work *work,uint32_t max_nonce, if ( fulltest( hash, ptarget) && !opt_benchmark ) { pdata[19] = n; - submit_solution( work, hash, mythr, 0 ); + submit_lane_solution( work, hash, mythr, 0 ); } if ( !( (hash+8)[7] & mask ) ) if ( fulltest( hash+8, ptarget) && !opt_benchmark ) { pdata[19] = n+1; - submit_solution( work, hash+8, mythr, 1 ); + submit_lane_solution( work, hash+8, mythr, 1 ); } n += 2; } while ( ( n < max_nonce ) && !work_restart[thr_id].restart ); diff --git a/algo/qubit/qubit-gate.h b/algo/qubit/qubit-gate.h index 98af09ab..741a71ac 100644 --- a/algo/qubit/qubit-gate.h +++ b/algo/qubit/qubit-gate.h @@ -13,14 +13,14 @@ bool register_qubit_algo( algo_gate_t* gate ); #if defined(QUBIT_2WAY) void qubit_2way_hash( void *state, const void *input ); -int scanhash_qubit_2way( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_qubit_2way( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ); void init_qubit_2way_ctx(); #endif void qubit_hash( void *state, const void *input ); -int scanhash_qubit( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_qubit( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ); void init_qubit_ctx(); diff --git a/algo/qubit/qubit.c b/algo/qubit/qubit.c index d90eeb35..fc953beb 100644 --- a/algo/qubit/qubit.c +++ b/algo/qubit/qubit.c @@ -83,7 +83,7 @@ void qubit_hash(void *output, const void *input) memcpy(output, hash, 32); } -int scanhash_qubit( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_qubit( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t endiandata[20] __attribute__((aligned(64))); @@ -92,7 +92,7 @@ int scanhash_qubit( int thr_id, struct work *work, uint32_t max_nonce, uint32_t *ptarget = work->target; uint32_t n = pdata[19] - 1; const uint32_t first_nonce = pdata[19]; - /* int */ thr_id = mythr->id; // thr_id arg is deprecated + int thr_id = mythr->id; // thr_id arg is deprecated const uint32_t Htarg = ptarget[7]; uint64_t htmax[] = { 0, 0xF, 0xFF, 0xFFF, 0xFFFF, 0x10000000 }; diff --git a/algo/ripemd/lbry-4way.c b/algo/ripemd/lbry-4way.c index 383981ed..16486701 100644 --- a/algo/ripemd/lbry-4way.c +++ b/algo/ripemd/lbry-4way.c @@ -75,7 +75,7 @@ void lbry_8way_hash( void* output, const void* input ) sha256_8way_close( &ctx_sha256, output ); } -int scanhash_lbry_8way( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_lbry_8way( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t hash[8*8] __attribute__ ((aligned (64))); @@ -89,7 +89,7 @@ int scanhash_lbry_8way( int thr_id, struct work *work, uint32_t max_nonce, const uint32_t Htarg = ptarget[7]; uint32_t edata[32] __attribute__ ((aligned (64))); __m256i *noncev = (__m256i*)vdata + 27; // aligned - /* int */ thr_id = mythr->id; // thr_id arg is deprecated + int thr_id = mythr->id; // thr_id arg is deprecated uint64_t htmax[] = { 0, 0xF, 0xFF, 0xFFF, 0xFFFF, 0x10000000 }; @@ -122,7 +122,7 @@ int scanhash_lbry_8way( int thr_id, struct work *work, uint32_t max_nonce, if ( fulltest( lane_hash, ptarget ) && !opt_benchmark ) { pdata[27] = n + i; - submit_solution( work, lane_hash, mythr, i ); + submit_lane_solution( work, lane_hash, mythr, i ); } } n += 8; diff --git a/algo/ripemd/lbry-gate.h b/algo/ripemd/lbry-gate.h index cdf2f19c..e6d92632 100644 --- a/algo/ripemd/lbry-gate.h +++ b/algo/ripemd/lbry-gate.h @@ -21,19 +21,19 @@ bool register_lbry_algo( algo_gate_t* gate ); #if defined(LBRY_8WAY) void lbry_8way_hash( void *state, const void *input ); -int scanhash_lbry_8way( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_lbry_8way( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ); /* #elif defined(LBRY_4WAY) void lbry_4way_hash( void *state, const void *input ); -int scanhash_lbry_4way( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_lbry_4way( struct work *work, uint32_t max_nonce, uint64_t *hashes_done ); */ #else void lbry_hash( void *state, const void *input ); -int scanhash_lbry( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_lbry( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ); #endif #endif diff --git a/algo/ripemd/lbry.c b/algo/ripemd/lbry.c index b453c734..57f9a820 100644 --- a/algo/ripemd/lbry.c +++ b/algo/ripemd/lbry.c @@ -47,7 +47,7 @@ void lbry_hash(void* output, const void* input) memcpy( output, hashA, 32 ); } -int scanhash_lbry( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_lbry( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr) { uint32_t *pdata = work->data; @@ -55,7 +55,7 @@ int scanhash_lbry( int thr_id, struct work *work, uint32_t max_nonce, uint32_t n = pdata[27] - 1; const uint32_t first_nonce = pdata[27]; const uint32_t Htarg = ptarget[7]; - /* int */ thr_id = mythr->id; // thr_id arg is deprecated + int thr_id = mythr->id; // thr_id arg is deprecated uint32_t hash64[8] __attribute__((aligned(64))); uint32_t endiandata[32] __attribute__ ((aligned (64))); diff --git a/algo/neoscrypt/neoscrypt.c b/algo/scrypt/neoscrypt.c similarity index 99% rename from algo/neoscrypt/neoscrypt.c rename to algo/scrypt/neoscrypt.c index ea0fe0f6..3349afb8 100644 --- a/algo/neoscrypt/neoscrypt.c +++ b/algo/scrypt/neoscrypt.c @@ -1042,8 +1042,8 @@ static bool fulltest_le(const uint *hash, const uint *target) return(rc); } -int scanhash_neoscrypt( int thr_id, struct work *work, - uint32_t max_nonce, uint64_t *hashes_done ) +int scanhash_neoscrypt( struct work *work, + uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t *pdata = work->data; @@ -1051,6 +1051,7 @@ int scanhash_neoscrypt( int thr_id, struct work *work, uint32_t _ALIGN(64) hash[8]; const uint32_t Htarg = ptarget[7]; const uint32_t first_nonce = pdata[19]; + int thr_id = mythr->id; // thr_id arg is deprecated while (pdata[19] < max_nonce && !work_restart[thr_id].restart) { diff --git a/algo/pluck.c b/algo/scrypt/pluck.c similarity index 99% rename from algo/pluck.c rename to algo/scrypt/pluck.c index 5ade09b6..01f1c9f8 100644 --- a/algo/pluck.c +++ b/algo/scrypt/pluck.c @@ -444,7 +444,7 @@ void pluck_hash(uint32_t *hash, const uint32_t *data, uchar *hashbuffer, const i memcpy(hash, hashbuffer, 32); } -int scanhash_pluck(int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_pluck( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t *pdata = work->data; @@ -452,9 +452,9 @@ int scanhash_pluck(int thr_id, struct work *work, uint32_t max_nonce, uint32_t _ALIGN(64) endiandata[20]; uint32_t _ALIGN(64) hash[8]; const uint32_t first_nonce = pdata[19]; + int thr_id = mythr->id; // thr_id arg is deprecated volatile uint8_t *restart = &(work_restart[thr_id].restart); uint32_t n = first_nonce; - /* int */ thr_id = mythr->id; // thr_id arg is deprecated if (opt_benchmark) diff --git a/algo/scrypt.c b/algo/scrypt/scrypt.c similarity index 99% rename from algo/scrypt.c rename to algo/scrypt/scrypt.c index 5d570d81..387afbb7 100644 --- a/algo/scrypt.c +++ b/algo/scrypt/scrypt.c @@ -695,7 +695,7 @@ static void scrypt_1024_1_1_256_24way(const uint32_t *input, } #endif /* HAVE_SCRYPT_6WAY */ -extern int scanhash_scrypt( int thr_id, struct work *work, uint32_t max_nonce, +extern int scanhash_scrypt( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t *pdata = work->data; @@ -704,7 +704,7 @@ extern int scanhash_scrypt( int thr_id, struct work *work, uint32_t max_nonce, uint32_t midstate[8]; uint32_t n = pdata[19] - 1; const uint32_t Htarg = ptarget[7]; - /* int */ thr_id = mythr->id; // thr_id arg is deprecated + int thr_id = mythr->id; // thr_id arg is deprecated int throughput = scrypt_best_throughput(); int i; diff --git a/algo/scryptjane/scrypt-jane.c b/algo/scryptjane/scrypt-jane.c index 62de32c2..6afdc3e4 100644 --- a/algo/scryptjane/scrypt-jane.c +++ b/algo/scryptjane/scrypt-jane.c @@ -135,8 +135,8 @@ unsigned char GetNfactor(unsigned int nTimestamp, unsigned int ntime) { } -int scanhash_scryptjane( int thr_id, struct work *work, uint32_t max_nonce, - uint64_t *hashes_done) +int scanhash_scryptjane( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ) { scrypt_aligned_alloc YX, V; uint8_t *X, *Y; @@ -150,6 +150,7 @@ int scanhash_scryptjane( int thr_id, struct work *work, uint32_t max_nonce, uint32_t _ALIGN(64) endiandata[20]; const uint32_t first_nonce = pdata[19]; uint32_t nonce = first_nonce; + int thr_id = mythr->id; // thr_id arg is deprecated if (opt_benchmark) ptarget[7] = 0x00ff; diff --git a/algo/sha/sha2.c b/algo/sha/sha2.c index ac6be69c..5ab3ee85 100644 --- a/algo/sha/sha2.c +++ b/algo/sha/sha2.c @@ -586,8 +586,8 @@ static inline int scanhash_sha256d_8way(int thr_id, struct work *work, #endif /* HAVE_SHA256_8WAY */ -int scanhash_sha256d(int thr_id, struct work *work, - uint32_t max_nonce, uint64_t *hashes_done) +int scanhash_sha256d( struct work *work, + uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t *pdata = work->data; uint32_t *ptarget = work->target; @@ -598,7 +598,8 @@ int scanhash_sha256d(int thr_id, struct work *work, uint32_t n = pdata[19] - 1; const uint32_t first_nonce = pdata[19]; const uint32_t Htarg = ptarget[7]; - + int thr_id = mythr->id; // thr_id arg is deprecated + #ifdef HAVE_SHA256_8WAY if (sha256_use_8way()) return scanhash_sha256d_8way(thr_id, work, @@ -621,16 +622,14 @@ int scanhash_sha256d(int thr_id, struct work *work, do { data[3] = ++n; sha256d_ms(hash, data, midstate, prehash); - if (unlikely(swab32(hash[7]) <= Htarg)) { + if (unlikely(swab32(hash[7]) <= Htarg)) + { pdata[19] = data[3]; sha256d_80_swap(hash, pdata); - if (fulltest(hash, ptarget)) { - *hashes_done = n - first_nonce + 1; - return 1; - } + if ( fulltest(hash, ptarget) && !opt_benchmark ) + submit_solution( work, hash, mythr ); } } while (likely(n < max_nonce && !work_restart[thr_id].restart)); - *hashes_done = n - first_nonce + 1; pdata[19] = n; return 0; diff --git a/algo/sha/sha256q-4way.c b/algo/sha/sha256q-4way.c index 0c58ceeb..b908400d 100644 --- a/algo/sha/sha256q-4way.c +++ b/algo/sha/sha256q-4way.c @@ -31,7 +31,7 @@ void sha256q_8way_hash( void* output, const void* input ) sha256_8way_close( &ctx, output ); } -int scanhash_sha256q_8way( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_sha256q_8way( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t vdata[20*8] __attribute__ ((aligned (64))); @@ -42,7 +42,7 @@ int scanhash_sha256q_8way( int thr_id, struct work *work, uint32_t max_nonce, const uint32_t first_nonce = pdata[19]; uint32_t n = first_nonce; __m256i *noncev = (__m256i*)vdata + 19; // aligned - /* int */ thr_id = mythr->id; // thr_id arg is deprecated + int thr_id = mythr->id; // thr_id arg is deprecated const uint64_t htmax[] = { 0, 0xF, @@ -85,7 +85,7 @@ int scanhash_sha256q_8way( int thr_id, struct work *work, uint32_t max_nonce, if ( fulltest( lane_hash, ptarget ) && !opt_benchmark ) { pdata[19] = n + lane; - submit_solution( work, lane_hash, mythr, lane ); + submit_lane_solution( work, lane_hash, mythr, lane ); } } n += 8; @@ -124,7 +124,7 @@ void sha256q_4way_hash( void* output, const void* input ) sha256_4way_close( &ctx, output ); } -int scanhash_sha256q_4way( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_sha256q_4way( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t vdata[20*4] __attribute__ ((aligned (64))); @@ -137,7 +137,7 @@ int scanhash_sha256q_4way( int thr_id, struct work *work, uint32_t max_nonce, const uint32_t first_nonce = pdata[19]; uint32_t n = first_nonce; __m128i *noncev = (__m128i*)vdata + 19; // aligned - /* int */ thr_id = mythr->id; // thr_id arg is deprecated + int thr_id = mythr->id; // thr_id arg is deprecated const uint64_t htmax[] = { 0, 0xF, @@ -173,7 +173,7 @@ int scanhash_sha256q_4way( int thr_id, struct work *work, uint32_t max_nonce, if ( fulltest( lane_hash, ptarget ) && !opt_benchmark ) { pdata[19] = n + lane; - submit_solution( work, lane_hash, mythr, lane ); + submit_lane_solution( work, lane_hash, mythr, lane ); } } n += 4; diff --git a/algo/sha/sha256q.c b/algo/sha/sha256q.c index 8e9007bf..25f7d2d0 100644 --- a/algo/sha/sha256q.c +++ b/algo/sha/sha256q.c @@ -40,7 +40,7 @@ void sha256q_hash( void* output, const void* input ) memcpy( output, hash, 32 ); } -int scanhash_sha256q( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_sha256q( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t *pdata = work->data; @@ -54,7 +54,7 @@ int scanhash_sha256q( int thr_id, struct work *work, uint32_t max_nonce, uint32_t hash64[8] __attribute__((aligned(32))); #endif uint32_t endiandata[32]; - /* int */ thr_id = mythr->id; // thr_id arg is deprecated + int thr_id = mythr->id; // thr_id arg is deprecated uint64_t htmax[] = { 0, @@ -91,22 +91,13 @@ int scanhash_sha256q( int thr_id, struct work *work, uint32_t max_nonce, pdata[19] = ++n; be32enc(&endiandata[19], n); sha256q_hash( hash64, endiandata ); - if ( ( !(hash64[7] & mask) ) && fulltest( hash64, ptarget ) ) - { - work_set_target_ratio( work, hash64 ); - if ( submit_work( mythr, work ) ) - applog( LOG_NOTICE, "Share %d submitted by thread %d.", - accepted_share_count + rejected_share_count + 1, - thr_id ); - else - applog( LOG_WARNING, "Failed to submit share." ); - *hashes_done = n - first_nonce + 1; - } + if ( !( hash64[7] & mask ) ) + if ( fulltest( hash64, ptarget ) && !opt_benchmark ) + submit_solution( work, hash64, mythr ); } while ( n < max_nonce && !work_restart[thr_id].restart ); break; } } - *hashes_done = n - first_nonce + 1; pdata[19] = n; return 0; diff --git a/algo/sha/sha256t-4way.c b/algo/sha/sha256t-4way.c index 6b8cf62b..adc58cd9 100644 --- a/algo/sha/sha256t-4way.c +++ b/algo/sha/sha256t-4way.c @@ -33,7 +33,7 @@ void sha256t_11way_hash( void *outx, void *outy, void *outz, const void *inpx, sha256_11way_close( &ctx, outx, outy, outz ); } -int scanhash_sha256t_11way( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_sha256t_11way( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t datax[20*8] __attribute__ ((aligned (64))); @@ -52,7 +52,7 @@ int scanhash_sha256t_11way( int thr_id, struct work *work, uint32_t max_nonce, __m256i *noncex = (__m256i*) datax + 19; __m64 *noncey = (__m64*) datay + 19; uint32_t *noncez = (uint32_t*)dataz + 19; - /* int */ thr_id = mythr->id; // thr_id arg is deprecated + int thr_id = mythr->id; // thr_id arg is deprecated int i; const uint64_t htmax[] = { 0, 0xF, @@ -103,7 +103,7 @@ int scanhash_sha256t_11way( int thr_id, struct work *work, uint32_t max_nonce, if ( fulltest( lane_hash, ptarget ) ) { pdata[19] = n + i; - submit_solution( work, lane_hash, mythr, i ); + submit_lane_solution( work, lane_hash, mythr, i ); } } @@ -115,14 +115,14 @@ int scanhash_sha256t_11way( int thr_id, struct work *work, uint32_t max_nonce, if ( fulltest( lane_hash, ptarget ) ) { pdata[19] = n + 8 + i; - submit_solution( work, lane_hash, mythr, i+8 ); + submit_lane_solution( work, lane_hash, mythr, i+8 ); } } if ( !(hashz[7] & mask ) && fulltest( hashz, ptarget ) ) { pdata[19] = n+10; - submit_solution( work, hashz, mythr, 10 ); + submit_lane_solution( work, hashz, mythr, 10 ); } n += 11; @@ -158,7 +158,7 @@ void sha256t_8way_hash( void* output, const void* input ) sha256_8way_close( &ctx, output ); } -int scanhash_sha256t_8way( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_sha256t_8way( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t vdata[20*8] __attribute__ ((aligned (64))); @@ -171,7 +171,7 @@ int scanhash_sha256t_8way( int thr_id, struct work *work, uint32_t max_nonce, const uint32_t first_nonce = pdata[19]; uint32_t n = first_nonce; __m256i *noncev = (__m256i*)vdata + 19; // aligned - /* int */ thr_id = mythr->id; // thr_id arg is deprecated + int thr_id = mythr->id; // thr_id arg is deprecated const uint64_t htmax[] = { 0, 0xF, @@ -208,7 +208,7 @@ int scanhash_sha256t_8way( int thr_id, struct work *work, uint32_t max_nonce, if ( fulltest( lane_hash, ptarget ) && !opt_benchmark ) { pdata[19] = n + lane; - submit_solution( work, lane_hash, mythr, lane ); + submit_lane_solution( work, lane_hash, mythr, lane ); } } n += 8; @@ -243,7 +243,7 @@ void sha256t_4way_hash( void* output, const void* input ) sha256_4way_close( &ctx, output ); } -int scanhash_sha256t_4way( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_sha256t_4way( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t vdata[20*4] __attribute__ ((aligned (64))); @@ -256,7 +256,7 @@ int scanhash_sha256t_4way( int thr_id, struct work *work, uint32_t max_nonce, const uint32_t first_nonce = pdata[19]; uint32_t n = first_nonce; __m128i *noncev = (__m128i*)vdata + 19; // aligned - /* int */ thr_id = mythr->id; // thr_id arg is deprecated + int thr_id = mythr->id; // thr_id arg is deprecated const uint64_t htmax[] = { 0, 0xF, @@ -291,7 +291,7 @@ int scanhash_sha256t_4way( int thr_id, struct work *work, uint32_t max_nonce, if ( fulltest( lane_hash, ptarget ) && !opt_benchmark ) { pdata[19] = n + lane; - submit_solution( work, lane_hash, mythr, lane ); + submit_lane_solution( work, lane_hash, mythr, lane ); } } n += 4; diff --git a/algo/sha/sha256t-gate.h b/algo/sha/sha256t-gate.h index 9b85eb13..30b2766b 100644 --- a/algo/sha/sha256t-gate.h +++ b/algo/sha/sha256t-gate.h @@ -19,28 +19,28 @@ bool register_sha256q_algo( algo_gate_t* gate ); #if defined(SHA256T_8WAY) void sha256t_8way_hash( void *output, const void *input ); -int scanhash_sha256t_8way( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_sha256t_8way( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ); void sha256q_8way_hash( void *output, const void *input ); -int scanhash_sha256q_8way( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_sha256q_8way( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ); #endif #if defined(SHA256T_4WAY) void sha256t_4way_hash( void *output, const void *input ); -int scanhash_sha256t_4way( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_sha256t_4way( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ); void sha256q_4way_hash( void *output, const void *input ); -int scanhash_sha256q_4way( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_sha256q_4way( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ); #endif void sha256t_hash( void *output, const void *input ); -int scanhash_sha256t( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_sha256t( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ); void sha256q_hash( void *output, const void *input ); -int scanhash_sha256q( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_sha256q( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ); #endif diff --git a/algo/sha/sha256t.c b/algo/sha/sha256t.c index 6917ff79..bb401d04 100644 --- a/algo/sha/sha256t.c +++ b/algo/sha/sha256t.c @@ -36,7 +36,7 @@ void sha256t_hash( void* output, const void* input ) memcpy( output, hash, 32 ); } -int scanhash_sha256t( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_sha256t( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t *pdata = work->data; @@ -50,7 +50,7 @@ int scanhash_sha256t( int thr_id, struct work *work, uint32_t max_nonce, uint32_t hash64[8] __attribute__((aligned(32))); #endif uint32_t endiandata[32]; - /* int */ thr_id = mythr->id; // thr_id arg is deprecated + int thr_id = mythr->id; // thr_id arg is deprecated uint64_t htmax[] = { 0, @@ -87,22 +87,13 @@ int scanhash_sha256t( int thr_id, struct work *work, uint32_t max_nonce, pdata[19] = ++n; be32enc(&endiandata[19], n); sha256t_hash( hash64, endiandata ); - if ( ( !(hash64[7] & mask) ) && fulltest( hash64, ptarget ) ) - { - *hashes_done = n - first_nonce + 1; - work_set_target_ratio( work, hash64 ); - if ( submit_work( mythr, work ) ) - applog( LOG_NOTICE, "Share %d submitted by thread %d.", - accepted_share_count + rejected_share_count + 1, - thr_id ); - else - applog( LOG_WARNING, "Failed to submit share." ); - } + if ( !(hash64[7] & mask) ) + if ( fulltest( hash64, ptarget ) && !opt_benchmark ) + submit_solution( work, hash64, mythr ); } while ( n < max_nonce && !work_restart[thr_id].restart ); break; } } - *hashes_done = n - first_nonce + 1; pdata[19] = n; return 0; diff --git a/algo/shavite/shavite.c b/algo/shavite/shavite.c index 6891e28a..9ad98440 100644 --- a/algo/shavite/shavite.c +++ b/algo/shavite/shavite.c @@ -31,11 +31,12 @@ extern void inkhash(void *state, const void *input) */ } -int scanhash_ink(int thr_id, struct work *work, - uint32_t max_nonce, uint64_t *hashes_done) +int scanhash_ink( struct work *work, + uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t *pdata = work->data; uint32_t *ptarget = work->target; + int thr_id = mythr->id; uint32_t n = pdata[19] - 1; const uint32_t first_nonce = pdata[19]; diff --git a/algo/skein/skein-4way.c b/algo/skein/skein-4way.c index e4534f28..d81e2af8 100644 --- a/algo/skein/skein-4way.c +++ b/algo/skein/skein-4way.c @@ -13,7 +13,6 @@ void skeinhash_4way( void *state, const void *input ) { uint64_t vhash64[8*4] __attribute__ ((aligned (64))); - uint32_t vhash32[16*4] __attribute__ ((aligned (64))); skein512_4way_context ctx_skein; #if defined(__SHA__) uint32_t hash0[16] __attribute__ ((aligned (64))); @@ -22,6 +21,7 @@ void skeinhash_4way( void *state, const void *input ) uint32_t hash3[16] __attribute__ ((aligned (64))); SHA256_CTX ctx_sha256; #else + uint32_t vhash32[16*4] __attribute__ ((aligned (64))); sha256_4way_context ctx_sha256; #endif @@ -58,7 +58,7 @@ void skeinhash_4way( void *state, const void *input ) #endif } -int scanhash_skein_4way( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_skein_4way( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t vdata[20*4] __attribute__ ((aligned (64))); @@ -71,7 +71,7 @@ int scanhash_skein_4way( int thr_id, struct work *work, uint32_t max_nonce, const uint32_t first_nonce = pdata[19]; uint32_t n = first_nonce; __m256i *noncev = (__m256i*)vdata + 9; // aligned - /* int */ thr_id = mythr->id; // thr_id arg is deprecated + int thr_id = mythr->id; // thr_id arg is deprecated mm256_bswap_intrlv80_4x64( vdata, pdata ); do @@ -88,7 +88,7 @@ int scanhash_skein_4way( int thr_id, struct work *work, uint32_t max_nonce, if ( fulltest( lane_hash, ptarget ) ) { pdata[19] = n + lane; - submit_solution( work, lane_hash, mythr, lane ); + submit_lane_solution( work, lane_hash, mythr, lane ); } } n += 4; diff --git a/algo/skein/skein-gate.h b/algo/skein/skein-gate.h index bfa169d2..ac7f281f 100644 --- a/algo/skein/skein-gate.h +++ b/algo/skein/skein-gate.h @@ -11,13 +11,13 @@ void skeinhash_4way( void *output, const void *input ); -int scanhash_skein_4way( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_skein_4way( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ); #endif void skeinhash( void *output, const void *input ); -int scanhash_skein( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_skein( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ); #endif diff --git a/algo/skein/skein.c b/algo/skein/skein.c index b6b070f7..c4934067 100644 --- a/algo/skein/skein.c +++ b/algo/skein/skein.c @@ -21,7 +21,7 @@ void skeinhash(void *state, const void *input) memcpy(state, hash, 32); } -int scanhash_skein( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_skein( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t *pdata = work->data; @@ -31,7 +31,7 @@ int scanhash_skein( int thr_id, struct work *work, uint32_t max_nonce, const uint32_t Htarg = ptarget[7]; const uint32_t first_nonce = pdata[19]; uint32_t n = first_nonce; - /* int */ thr_id = mythr->id; // thr_id arg is deprecated + int thr_id = mythr->id; // thr_id arg is deprecated swab32_array( endiandata, pdata, 20 ); diff --git a/algo/skein/skein2-4way.c b/algo/skein/skein2-4way.c index fef6813e..4989ebcc 100644 --- a/algo/skein/skein2-4way.c +++ b/algo/skein/skein2-4way.c @@ -19,7 +19,7 @@ void skein2hash_4way( void *output, const void *input ) skein512_4way_close( &ctx, output ); } -int scanhash_skein2_4way( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_skein2_4way( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t hash[8*4] __attribute__ ((aligned (64))); @@ -31,7 +31,7 @@ int scanhash_skein2_4way( int thr_id, struct work *work, uint32_t max_nonce, const uint32_t first_nonce = pdata[19]; uint32_t n = first_nonce; __m256i *noncev = (__m256i*)vdata + 9; // aligned - /* int */ thr_id = mythr->id; // thr_id arg is deprecated + int thr_id = mythr->id; // thr_id arg is deprecated mm256_bswap_intrlv80_4x64( vdata, pdata ); do @@ -49,7 +49,7 @@ int scanhash_skein2_4way( int thr_id, struct work *work, uint32_t max_nonce, if ( fulltest( lane_hash, ptarget ) && !opt_benchmark ) { pdata[19] = n + lane; - submit_solution( work, lane_hash, mythr, lane ); + submit_lane_solution( work, lane_hash, mythr, lane ); } } n += 4; diff --git a/algo/skein/skein2-gate.h b/algo/skein/skein2-gate.h index 3e649365..5f3759b0 100644 --- a/algo/skein/skein2-gate.h +++ b/algo/skein/skein2-gate.h @@ -9,12 +9,12 @@ #if defined(SKEIN2_4WAY) void skein2hash_4way( void *output, const void *input ); -int scanhash_skein2_4way( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_skein2_4way( struct work *work, uint32_t max_nonce, uint64_t* hashes_done, struct thr_info *mythr ); #endif void skein2hash( void *output, const void *input ); -int scanhash_skein2( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_skein2( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ); #endif diff --git a/algo/skein/skein2.c b/algo/skein/skein2.c index 60b32cff..93f3c073 100644 --- a/algo/skein/skein2.c +++ b/algo/skein/skein2.c @@ -34,7 +34,7 @@ void skein2hash(void *output, const void *input) } -int scanhash_skein2( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_skein2( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t *pdata = work->data; @@ -44,7 +44,7 @@ int scanhash_skein2( int thr_id, struct work *work, uint32_t max_nonce, const uint32_t Htarg = ptarget[7]; const uint32_t first_nonce = pdata[19]; uint32_t n = first_nonce; - /* int */ thr_id = mythr->id; // thr_id arg is deprecated + int thr_id = mythr->id; // thr_id arg is deprecated swab32_array( endiandata, pdata, 20 ); diff --git a/algo/whirlpool/whirlpool-4way.c b/algo/whirlpool/whirlpool-4way.c deleted file mode 100644 index c182b413..00000000 --- a/algo/whirlpool/whirlpool-4way.c +++ /dev/null @@ -1,105 +0,0 @@ -#include "whirlpool-gate.h" - -#if defined(__AVX2__) - -#include -#include -#include -#include -#include "sph_whirlpool.h" -#include "whirlpool-hash-4way.h" - -static __thread whirlpool_4way_context whirl_mid; - -void whirlpool_hash_4way( void *state, const void *input ) -{ - uint64_t hash0[8] __attribute__ ((aligned (64))); - uint64_t hash1[8] __attribute__ ((aligned (64))); - uint64_t hash2[8] __attribute__ ((aligned (64))); - uint64_t hash3[8] __attribute__ ((aligned (64))); - uint64_t vhash[8*4] __attribute__ ((aligned (64))); - const int midlen = 64; - const int tail = 80 - midlen; - whirlpool_4way_context ctx; - - memcpy( &ctx, &whirl_mid, sizeof whirl_mid ); - whirlpool1_4way( &ctx, input + (midlen<<2), tail ); - whirlpool1_4way_close( &ctx, vhash); - -// whirlpool1_4way_init( &ctx ); -// whirlpool1_4way( &ctx, input, 80 ); -// whirlpool1_4way_close( &ctx, vhash); - - whirlpool1_4way_init( &ctx ); - whirlpool1_4way( &ctx, vhash, 64 ); - whirlpool1_4way_close( &ctx, vhash); - - whirlpool1_4way_init( &ctx ); - whirlpool1_4way( &ctx, vhash, 64 ); - whirlpool1_4way_close( &ctx, vhash); - - whirlpool1_4way_init( &ctx ); - whirlpool1_4way( &ctx, vhash, 64 ); - whirlpool1_4way_close( &ctx, vhash); - - mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); - - memcpy( state , hash0, 32 ); - memcpy( state+32, hash1, 32 ); - memcpy( state+64, hash2, 32 ); - memcpy( state+96, hash3, 32 ); -} - -int scanhash_whirlpool_4way( int thr_id, struct work* work, uint32_t max_nonce, - uint64_t *hashes_done ) -{ - uint32_t hash[4*8] __attribute__ ((aligned (64))); - uint32_t vdata[20*4] __attribute__ ((aligned (64))); - uint32_t _ALIGN(128) endiandata[20]; - uint32_t* pdata = work->data; - uint32_t* ptarget = work->target; - const uint32_t first_nonce = pdata[19]; - uint32_t n = first_nonce; - uint32_t *nonces = work->nonces; - int num_found = 0; - uint32_t *noncep = vdata + 73; // 9*8 + 1 - - if (opt_benchmark) - ((uint32_t*)ptarget)[7] = 0x0000ff; - - for (int i=0; i < 19; i++) - be32enc(&endiandata[i], pdata[i]); - - uint64_t *edata = (uint64_t*)endiandata; - mm256_interleave_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 ); - - // midstate - whirlpool1_4way_init( &whirl_mid ); - whirlpool1_4way( &whirl_mid, vdata, 64 ); - - do { - const uint32_t Htarg = ptarget[7]; - be32enc( noncep, n ); - be32enc( noncep+2, n+1 ); - be32enc( noncep+4, n+2 ); - be32enc( noncep+6, n+3 ); - pdata[19] = n; - - whirlpool_hash_4way( hash, vdata ); - - for ( int i = 0; i < 4; i++ ) - if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) ) - { - nonces[ num_found++ ] = n+i; - work_set_target_ratio( work, hash+(i<<3) ); - } - n += 4; - - } while ( ( num_found == 0 ) && ( n < max_nonce ) - && !work_restart[thr_id].restart ); - - *hashes_done = n - first_nonce + 1; - return num_found; -} - -#endif diff --git a/algo/whirlpool/whirlpool-gate.h b/algo/whirlpool/whirlpool-gate.h index adf29948..f82e1792 100644 --- a/algo/whirlpool/whirlpool-gate.h +++ b/algo/whirlpool/whirlpool-gate.h @@ -14,14 +14,14 @@ void whirlpool_hash_4way(void *state, const void *input); -int scanhash_whirlpool_4way( int thr_id, struct work *work, uint32_t max_nonce, - uint64_t *hashes_done ); +int scanhash_whirlpool_4way( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ); #else void whirlpool_hash( void *state, const void *input ); -int scanhash_whirlpool( int thr_id, struct work *work, uint32_t max_nonce, - uint64_t *hashes_done ); +int scanhash_whirlpool( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ); void init_whirlpool_ctx(); #endif diff --git a/algo/whirlpool/whirlpool.c b/algo/whirlpool/whirlpool.c index 465e0b3e..18f38c42 100644 --- a/algo/whirlpool/whirlpool.c +++ b/algo/whirlpool/whirlpool.c @@ -57,14 +57,15 @@ void whirlpool_midstate( const void* input ) } -int scanhash_whirlpool( int thr_id, struct work* work, uint32_t max_nonce, - uint64_t *hashes_done ) +int scanhash_whirlpool( struct work* work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t _ALIGN(128) endiandata[20]; uint32_t* pdata = work->data; uint32_t* ptarget = work->target; const uint32_t first_nonce = pdata[19]; uint32_t n = first_nonce - 1; + int thr_id = mythr->id; // thr_id arg is deprecated if (opt_benchmark) ((uint32_t*)ptarget)[7] = 0x0000ff; diff --git a/algo/whirlpool/whirlpoolx.c b/algo/whirlpool/whirlpoolx.c index e0992729..5c82acde 100644 --- a/algo/whirlpool/whirlpoolx.c +++ b/algo/whirlpool/whirlpoolx.c @@ -24,14 +24,15 @@ void whirlpoolx_hash(void *state, const void *input) memcpy(state, hash, 32); } -int scanhash_whirlpoolx( int thr_id, struct work* work, uint32_t max_nonce, - uint64_t *hashes_done) +int scanhash_whirlpoolx( struct work* work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t _ALIGN(128) endiandata[20]; uint32_t* pdata = work->data; uint32_t* ptarget = work->target; const uint32_t first_nonce = pdata[19]; uint32_t n = first_nonce - 1; + int thr_id = mythr->id; // thr_id arg is deprecated if (opt_benchmark) ((uint32_t*)ptarget)[7] = 0x0000ff; diff --git a/algo/x11/c11-4way.c b/algo/x11/c11-4way.c index 97244157..eec3d3cd 100644 --- a/algo/x11/c11-4way.c +++ b/algo/x11/c11-4way.c @@ -69,7 +69,7 @@ void c11_4way_hash( void *state, const void *input ) bmw512_4way_close( &ctx.bmw, vhash ); // Serial - mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); + mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); // 3 Groestl update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 ); @@ -81,7 +81,7 @@ void c11_4way_hash( void *state, const void *input ) update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3, 512 ); // 4way - mm256_interleave_4x64( vhash, hash0, hash1, hash2, hash3, 512 ); + mm256_intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 ); // 4 JH jh512_4way( &ctx.jh, vhash, 64 ); @@ -96,16 +96,16 @@ void c11_4way_hash( void *state, const void *input ) skein512_4way_close( &ctx.skein, vhash ); // Serial - mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); + mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); // 7 Luffa - mm256_interleave_2x128( vhash, hash0, hash1, 512 ); - mm256_interleave_2x128( vhashB, hash2, hash3, 512 ); + mm256_intrlv_2x128( vhash, hash0, hash1, 512 ); + mm256_intrlv_2x128( vhashB, hash2, hash3, 512 ); luffa_2way_update_close( &ctx.luffa, vhash, vhash, 64 ); luffa_2way_init( &ctx.luffa, 512 ); luffa_2way_update_close( &ctx.luffa, vhashB, vhashB, 64 ); - mm256_deinterleave_2x128( hash0, hash1, vhash, 512 ); - mm256_deinterleave_2x128( hash2, hash3, vhashB, 512 ); + mm256_dintrlv_2x128( hash0, hash1, vhash, 512 ); + mm256_dintrlv_2x128( hash2, hash3, vhashB, 512 ); // 8 Cubehash cubehashUpdateDigest( &ctx.cube, (byte*)hash0, (const byte*) hash0, 64 ); @@ -133,13 +133,13 @@ void c11_4way_hash( void *state, const void *input ) sph_shavite512_close( &ctx.shavite, hash3 ); // 10 Simd - mm256_interleave_2x128( vhash, hash0, hash1, 512 ); - mm256_interleave_2x128( vhashB, hash2, hash3, 512 ); + mm256_intrlv_2x128( vhash, hash0, hash1, 512 ); + mm256_intrlv_2x128( vhashB, hash2, hash3, 512 ); simd_2way_update_close( &ctx.simd, vhash, vhash, 512 ); simd_2way_init( &ctx.simd, 512 ); simd_2way_update_close( &ctx.simd, vhashB, vhashB, 512 ); - mm256_deinterleave_2x128( hash0, hash1, vhash, 512 ); - mm256_deinterleave_2x128( hash2, hash3, vhashB, 512 ); + mm256_dintrlv_2x128( hash0, hash1, vhash, 512 ); + mm256_dintrlv_2x128( hash2, hash3, vhashB, 512 ); // 11 Echo update_final_echo( &ctx.echo, (BitSequence *)hash0, @@ -160,8 +160,8 @@ void c11_4way_hash( void *state, const void *input ) memcpy( state+96, hash3, 32 ); } -int scanhash_c11_4way( int thr_id, struct work *work, uint32_t max_nonce, - uint64_t *hashes_done ) +int scanhash_c11_4way( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t hash[4*8] __attribute__ ((aligned (64))); uint32_t vdata[24*4] __attribute__ ((aligned (64))); @@ -170,8 +170,7 @@ int scanhash_c11_4way( int thr_id, struct work *work, uint32_t max_nonce, uint32_t *ptarget = work->target; uint32_t n = pdata[19]; const uint32_t first_nonce = pdata[19]; - uint32_t *nonces = work->nonces; - int num_found = 0; + int thr_id = mythr->id; // thr_id arg is deprecated uint32_t *noncep = vdata + 73; // 9*8 + 1 const uint32_t Htarg = ptarget[7]; uint64_t htmax[] = { 0, 0xF, 0xFF, @@ -183,7 +182,7 @@ int scanhash_c11_4way( int thr_id, struct work *work, uint32_t max_nonce, swab32_array( endiandata, pdata, 20 ); uint64_t *edata = (uint64_t*)endiandata; - mm256_interleave_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 ); + mm256_intrlv_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 ); for (int m=0; m < 6; m++) if (Htarg <= htmax[m]) @@ -201,20 +200,18 @@ int scanhash_c11_4way( int thr_id, struct work *work, uint32_t max_nonce, for ( int i = 0; i < 4; i++ ) if ( ( ( (hash+(i<<3))[7] & mask ) == 0 ) - && fulltest( hash+(i<<3), ptarget ) ) + && fulltest( hash+(i<<3), ptarget ) && !opt_benchmark ) { pdata[19] = n+i; - nonces[ num_found++ ] = n+i; - work_set_target_ratio( work, hash+(i<<3) ); + submit_lane_solution( work, hash+(i<<3), mythr, i ); } n += 4; - } while ( ( num_found == 0 ) && ( n < max_nonce ) - && !work_restart[thr_id].restart ); + } while ( ( n < max_nonce ) && !work_restart[thr_id].restart ); break; } *hashes_done = n - first_nonce + 1; - return num_found; + return 0; } #endif diff --git a/algo/x11/c11-gate.h b/algo/x11/c11-gate.h index 4983c518..e4f88a56 100644 --- a/algo/x11/c11-gate.h +++ b/algo/x11/c11-gate.h @@ -14,8 +14,8 @@ bool register_c11_algo( algo_gate_t* gate ); void c11_4way_hash( void *state, const void *input ); -int scanhash_c11_4way( int thr_id, struct work *work, uint32_t max_nonce, - uint64_t *hashes_done ); +int scanhash_c11_4way( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ); void init_c11_4way_ctx(); @@ -23,8 +23,8 @@ void init_c11_4way_ctx(); void c11_hash( void *state, const void *input ); -int scanhash_c11( int thr_id, struct work *work, uint32_t max_nonce, - uint64_t *hashes_done ); +int scanhash_c11( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ); void init_c11_ctx(); diff --git a/algo/x11/c11.c b/algo/x11/c11.c index 5f31e2aa..c51f567f 100644 --- a/algo/x11/c11.c +++ b/algo/x11/c11.c @@ -137,8 +137,8 @@ void c11_hash( void *output, const void *input ) memcpy(output, hash+64, 32); } -int scanhash_c11( int thr_id, struct work *work, uint32_t max_nonce, - uint64_t *hashes_done ) +int scanhash_c11( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t endiandata[20] __attribute__((aligned(64))); uint32_t hash[8] __attribute__((aligned(64))); @@ -147,6 +147,7 @@ int scanhash_c11( int thr_id, struct work *work, uint32_t max_nonce, const uint32_t first_nonce = pdata[19]; const uint32_t Htarg = ptarget[7]; uint32_t nonce = first_nonce; + int thr_id = mythr->id; volatile uint8_t *restart = &(work_restart[thr_id].restart); if (opt_benchmark) diff --git a/algo/x11/fresh.c b/algo/x11/fresh.c index 0601a05a..79491c64 100644 --- a/algo/x11/fresh.c +++ b/algo/x11/fresh.c @@ -44,12 +44,13 @@ extern void freshhash(void* output, const void* input, uint32_t len) memcpy(output, hash, 32); } -int scanhash_fresh(int thr_id, struct work *work, - uint32_t max_nonce, uint64_t *hashes_done) +int scanhash_fresh( struct work *work, + uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr) { uint32_t *pdata = work->data; uint32_t *ptarget = work->target; uint32_t len = 80; + int thr_id = mythr->id; // thr_id arg is deprecated uint32_t n = pdata[19] - 1; const uint32_t first_nonce = pdata[19]; diff --git a/algo/x11/timetravel-4way.c b/algo/x11/timetravel-4way.c index 7be9c8cc..155ad118 100644 --- a/algo/x11/timetravel-4way.c +++ b/algo/x11/timetravel-4way.c @@ -87,18 +87,18 @@ void timetravel_4way_hash(void *output, const void *input) blake512_4way( &ctx.blake, vhashA, dataLen ); blake512_4way_close( &ctx.blake, vhashB ); if ( i == 7 ) - mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, + mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhashB, dataLen<<3 ); break; case 1: bmw512_4way( &ctx.bmw, vhashA, dataLen ); bmw512_4way_close( &ctx.bmw, vhashB ); if ( i == 7 ) - mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, + mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhashB, dataLen<<3 ); break; case 2: - mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, + mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhashA, dataLen<<3 ); update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, dataLen<<3 ); @@ -112,46 +112,46 @@ void timetravel_4way_hash(void *output, const void *input) update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3, dataLen<<3 ); if ( i != 7 ) - mm256_interleave_4x64( vhashB, + mm256_intrlv_4x64( vhashB, hash0, hash1, hash2, hash3, dataLen<<3 ); break; case 3: skein512_4way( &ctx.skein, vhashA, dataLen ); skein512_4way_close( &ctx.skein, vhashB ); if ( i == 7 ) - mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, + mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhashB, dataLen<<3 ); break; case 4: jh512_4way( &ctx.jh, vhashA, dataLen ); jh512_4way_close( &ctx.jh, vhashB ); if ( i == 7 ) - mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, + mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhashB, dataLen<<3 ); break; case 5: keccak512_4way( &ctx.keccak, vhashA, dataLen ); keccak512_4way_close( &ctx.keccak, vhashB ); if ( i == 7 ) - mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, + mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhashB, dataLen<<3 ); break; case 6: - mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, + mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhashA, dataLen<<3 ); - mm256_interleave_2x128( vhashA, hash0, hash1, dataLen<<3 ); + mm256_intrlv_2x128( vhashA, hash0, hash1, dataLen<<3 ); luffa_2way_update_close( &ctx.luffa, vhashA, vhashA, dataLen ); - mm256_deinterleave_2x128( hash0, hash1, vhashA, dataLen<<3 ); - mm256_interleave_2x128( vhashA, hash2, hash3, dataLen<<3 ); + mm256_dintrlv_2x128( hash0, hash1, vhashA, dataLen<<3 ); + mm256_intrlv_2x128( vhashA, hash2, hash3, dataLen<<3 ); luffa_2way_init( &ctx.luffa, 512 ); luffa_2way_update_close( &ctx.luffa, vhashA, vhashA, dataLen ); - mm256_deinterleave_2x128( hash2, hash3, vhashA, dataLen<<3 ); + mm256_dintrlv_2x128( hash2, hash3, vhashA, dataLen<<3 ); if ( i != 7 ) - mm256_interleave_4x64( vhashB, + mm256_intrlv_4x64( vhashB, hash0, hash1, hash2, hash3, dataLen<<3 ); break; case 7: - mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, + mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhashA, dataLen<<3 ); cubehashUpdateDigest( &ctx.cube, (byte*)hash0, (const byte*)hash0, dataLen ); @@ -165,7 +165,7 @@ void timetravel_4way_hash(void *output, const void *input) cubehashUpdateDigest( &ctx.cube, (byte*)hash3, (const byte*)hash3, dataLen ); if ( i != 7 ) - mm256_interleave_4x64( vhashB, + mm256_intrlv_4x64( vhashB, hash0, hash1, hash2, hash3, dataLen<<3 ); break; default: @@ -180,8 +180,8 @@ void timetravel_4way_hash(void *output, const void *input) memcpy( output+96, hash3, 32 ); } -int scanhash_timetravel_4way( int thr_id, struct work *work, uint32_t max_nonce, - uint64_t *hashes_done ) +int scanhash_timetravel_4way( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t hash[4*8] __attribute__ ((aligned (64))); uint32_t vdata[24*4] __attribute__ ((aligned (64))); @@ -190,10 +190,9 @@ int scanhash_timetravel_4way( int thr_id, struct work *work, uint32_t max_nonce, uint32_t *ptarget = work->target; uint32_t n = pdata[19]; const uint32_t first_nonce = pdata[19]; - uint32_t *nonces = work->nonces; - int num_found = 0; uint32_t *noncep = vdata + 73; // 9*8 + 1 const uint32_t Htarg = ptarget[7]; + int thr_id = mythr->id; // thr_id arg is deprecated volatile uint8_t *restart = &(work_restart[thr_id].restart); int i; @@ -216,7 +215,7 @@ int scanhash_timetravel_4way( int thr_id, struct work *work, uint32_t max_nonce, } uint64_t *edata = (uint64_t*)endiandata; - mm256_interleave_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 ); + mm256_intrlv_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 ); do { @@ -229,17 +228,17 @@ int scanhash_timetravel_4way( int thr_id, struct work *work, uint32_t max_nonce, pdata[19] = n; for ( int i = 0; i < 4; i++ ) - if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) ) + if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) + && !opt_benchmark ) { pdata[19] = n+i; - nonces[ num_found++ ] = n+i; - work_set_target_ratio( work, hash+(i<<3) ); + submit_lane_solution( work, hash+(i<<3), mythr, i ); } n += 4; - } while ( ( num_found == 0 ) && ( n < max_nonce ) && !(*restart) ); + } while ( ( n < max_nonce ) && !(*restart) ); *hashes_done = n - first_nonce + 1; - return num_found; + return 0; } #endif diff --git a/algo/x11/timetravel-gate.h b/algo/x11/timetravel-gate.h index 7aa9b34d..e9c1ae0c 100644 --- a/algo/x11/timetravel-gate.h +++ b/algo/x11/timetravel-gate.h @@ -22,8 +22,8 @@ bool register_timetravel_algo( algo_gate_t* gate ); void timetravel_4way_hash( void *state, const void *input ); -int scanhash_timetravel_4way( int thr_id, struct work *work, uint32_t max_nonce, - uint64_t *hashes_done ); +int scanhash_timetravel_4way( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ); void init_tt8_4way_ctx(); @@ -31,8 +31,8 @@ void init_tt8_4way_ctx(); void timetravel_hash( void *state, const void *input ); -int scanhash_timetravel( int thr_id, struct work *work, uint32_t max_nonce, - uint64_t *hashes_done ); +int scanhash_timetravel( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ); void init_tt8_ctx(); diff --git a/algo/x11/timetravel.c b/algo/x11/timetravel.c index 5161a56a..02ad5ec9 100644 --- a/algo/x11/timetravel.c +++ b/algo/x11/timetravel.c @@ -210,14 +210,14 @@ void timetravel_hash(void *output, const void *input) memcpy(output, &hash[16 * (TT8_FUNC_COUNT - 1)], 32); } -int scanhash_timetravel( int thr_id, struct work *work, uint32_t max_nonce, - uint64_t *hashes_done ) +int scanhash_timetravel( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t _ALIGN(64) hash[8]; uint32_t _ALIGN(64) endiandata[20]; uint32_t *pdata = work->data; uint32_t *ptarget = work->target; - + int thr_id = mythr->id; // thr_id arg is deprecated const uint32_t Htarg = ptarget[7]; const uint32_t first_nonce = pdata[19]; uint32_t nonce = first_nonce; diff --git a/algo/x11/timetravel10-4way.c b/algo/x11/timetravel10-4way.c index 783e1f6d..463c3a9d 100644 --- a/algo/x11/timetravel10-4way.c +++ b/algo/x11/timetravel10-4way.c @@ -93,18 +93,18 @@ void timetravel10_4way_hash(void *output, const void *input) blake512_4way( &ctx.blake, vhashA, dataLen ); blake512_4way_close( &ctx.blake, vhashB ); if ( i == 9 ) - mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, + mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhashB, dataLen<<3 ); break; case 1: bmw512_4way( &ctx.bmw, vhashA, dataLen ); bmw512_4way_close( &ctx.bmw, vhashB ); if ( i == 9 ) - mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, + mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhashB, dataLen<<3 ); break; case 2: - mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, + mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhashA, dataLen<<3 ); update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, dataLen<<3 ); @@ -118,46 +118,46 @@ void timetravel10_4way_hash(void *output, const void *input) update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3, dataLen<<3 ); if ( i != 9 ) - mm256_interleave_4x64( vhashB, + mm256_intrlv_4x64( vhashB, hash0, hash1, hash2, hash3, dataLen<<3 ); break; case 3: skein512_4way( &ctx.skein, vhashA, dataLen ); skein512_4way_close( &ctx.skein, vhashB ); if ( i == 9 ) - mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, + mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhashB, dataLen<<3 ); break; case 4: jh512_4way( &ctx.jh, vhashA, dataLen ); jh512_4way_close( &ctx.jh, vhashB ); if ( i == 9 ) - mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, + mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhashB, dataLen<<3 ); break; case 5: keccak512_4way( &ctx.keccak, vhashA, dataLen ); keccak512_4way_close( &ctx.keccak, vhashB ); if ( i == 9 ) - mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, + mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhashB, dataLen<<3 ); break; case 6: - mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, + mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhashA, dataLen<<3 ); - mm256_interleave_2x128( vhashA, hash0, hash1, dataLen<<3 ); + mm256_intrlv_2x128( vhashA, hash0, hash1, dataLen<<3 ); luffa_2way_update_close( &ctx.luffa, vhashA, vhashA, dataLen ); - mm256_deinterleave_2x128( hash0, hash1, vhashA, dataLen<<3 ); - mm256_interleave_2x128( vhashA, hash2, hash3, dataLen<<3 ); + mm256_dintrlv_2x128( hash0, hash1, vhashA, dataLen<<3 ); + mm256_intrlv_2x128( vhashA, hash2, hash3, dataLen<<3 ); luffa_2way_init( &ctx.luffa, 512 ); luffa_2way_update_close( &ctx.luffa, vhashA, vhashA, dataLen ); - mm256_deinterleave_2x128( hash2, hash3, vhashA, dataLen<<3 ); + mm256_dintrlv_2x128( hash2, hash3, vhashA, dataLen<<3 ); if ( i != 9 ) - mm256_interleave_4x64( vhashB, + mm256_intrlv_4x64( vhashB, hash0, hash1, hash2, hash3, dataLen<<3 ); break; case 7: - mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, + mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhashA, dataLen<<3 ); cubehashUpdateDigest( &ctx.cube, (byte*)hash0, (const byte*)hash0, dataLen ); @@ -171,11 +171,11 @@ void timetravel10_4way_hash(void *output, const void *input) cubehashUpdateDigest( &ctx.cube, (byte*)hash3, (const byte*)hash3, dataLen ); if ( i != 9 ) - mm256_interleave_4x64( vhashB, + mm256_intrlv_4x64( vhashB, hash0, hash1, hash2, hash3, dataLen<<3 ); break; case 8: - mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, + mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhashA, dataLen<<3 ); sph_shavite512( &ctx.shavite, hash0, dataLen ); sph_shavite512_close( &ctx.shavite, hash0 ); @@ -189,21 +189,21 @@ void timetravel10_4way_hash(void *output, const void *input) sph_shavite512( &ctx.shavite, hash3, dataLen ); sph_shavite512_close( &ctx.shavite, hash3 ); if ( i != 9 ) - mm256_interleave_4x64( vhashB, + mm256_intrlv_4x64( vhashB, hash0, hash1, hash2, hash3, dataLen<<3 ); break; case 9: - mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, + mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhashA, dataLen<<3 ); - mm256_interleave_2x128( vhashA, hash0, hash1, dataLen<<3 ); + mm256_intrlv_2x128( vhashA, hash0, hash1, dataLen<<3 ); simd_2way_update_close( &ctx.simd, vhashA, vhashA, dataLen<<3 ); - mm256_deinterleave_2x128( hash0, hash1, vhashA, dataLen<<3 ); - mm256_interleave_2x128( vhashA, hash2, hash3, dataLen<<3 ); + mm256_dintrlv_2x128( hash0, hash1, vhashA, dataLen<<3 ); + mm256_intrlv_2x128( vhashA, hash2, hash3, dataLen<<3 ); simd_2way_init( &ctx.simd, 512 ); simd_2way_update_close( &ctx.simd, vhashA, vhashA, dataLen<<3 ); - mm256_deinterleave_2x128( hash2, hash3, vhashA, dataLen<<3 ); + mm256_dintrlv_2x128( hash2, hash3, vhashA, dataLen<<3 ); if ( i != 9 ) - mm256_interleave_4x64( vhashB, + mm256_intrlv_4x64( vhashB, hash0, hash1, hash2, hash3, dataLen<<3 ); break; default: @@ -218,8 +218,8 @@ void timetravel10_4way_hash(void *output, const void *input) memcpy( output+96, hash3, 32 ); } -int scanhash_timetravel10_4way( int thr_id, struct work *work, - uint32_t max_nonce, uint64_t *hashes_done ) +int scanhash_timetravel10_4way( struct work *work, + uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t hash[4*8] __attribute__ ((aligned (64))); uint32_t vdata[24*4] __attribute__ ((aligned (64))); @@ -228,9 +228,8 @@ int scanhash_timetravel10_4way( int thr_id, struct work *work, uint32_t *ptarget = work->target; uint32_t n = pdata[19]; const uint32_t first_nonce = pdata[19]; - uint32_t *nonces = work->nonces; - int num_found = 0; uint32_t *noncep = vdata + 73; // 9*8 + 1 + int thr_id = mythr->id; // thr_id arg is deprecated const uint32_t Htarg = ptarget[7]; volatile uint8_t *restart = &(work_restart[thr_id].restart); int i; @@ -254,7 +253,7 @@ int scanhash_timetravel10_4way( int thr_id, struct work *work, } uint64_t *edata = (uint64_t*)endiandata; - mm256_interleave_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 ); + mm256_intrlv_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 ); do { @@ -267,16 +266,16 @@ int scanhash_timetravel10_4way( int thr_id, struct work *work, pdata[19] = n; for ( int i = 0; i < 4; i++ ) - if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) ) + if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) + && !opt_benchmark ) { pdata[19] = n+i; - nonces[ num_found++ ] = n+i; - work_set_target_ratio( work, hash+(i<<3) ); + submit_lane_solution( work, hash+(i<<3), mythr, i ); } n += 4; - } while ( ( num_found == 0 ) && ( n < max_nonce ) && !(*restart) ); + } while ( ( n < max_nonce ) && !(*restart) ); *hashes_done = n - first_nonce + 1; - return num_found; + return 0; } #endif diff --git a/algo/x11/timetravel10-gate.h b/algo/x11/timetravel10-gate.h index d2823a12..35328f58 100644 --- a/algo/x11/timetravel10-gate.h +++ b/algo/x11/timetravel10-gate.h @@ -21,8 +21,8 @@ bool register_timetravel10_algo( algo_gate_t* gate ); void timetravel10_4way_hash( void *state, const void *input ); -int scanhash_timetravel10_4way( int thr_id, struct work *work, - uint32_t max_nonce, uint64_t *hashes_done ); +int scanhash_timetravel10_4way( struct work *work, + uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ); void init_tt10_4way_ctx(); @@ -30,8 +30,8 @@ void init_tt10_4way_ctx(); void timetravel10_hash( void *state, const void *input ); -int scanhash_timetravel10( int thr_id, struct work *work, uint32_t max_nonce, - uint64_t *hashes_done ); +int scanhash_timetravel10( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ); void init_tt10_ctx(); diff --git a/algo/x11/timetravel10.c b/algo/x11/timetravel10.c index 33fa1c7d..0fefba55 100644 --- a/algo/x11/timetravel10.c +++ b/algo/x11/timetravel10.c @@ -242,13 +242,14 @@ void timetravel10_hash(void *output, const void *input) memcpy(output, &hash[16 * (TT10_FUNC_COUNT - 1)], 32); } -int scanhash_timetravel10( int thr_id, struct work *work, uint32_t max_nonce, - uint64_t *hashes_done ) +int scanhash_timetravel10( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t _ALIGN(64) hash[8]; uint32_t _ALIGN(64) endiandata[20]; uint32_t *pdata = work->data; uint32_t *ptarget = work->target; + int thr_id = mythr->id; // thr_id arg is deprecated const uint32_t Htarg = ptarget[7]; const uint32_t first_nonce = pdata[19]; diff --git a/algo/x11/tribus-4way.c b/algo/x11/tribus-4way.c index ce7380f1..41baa95e 100644 --- a/algo/x11/tribus-4way.c +++ b/algo/x11/tribus-4way.c @@ -37,7 +37,7 @@ void tribus_hash_4way(void *state, const void *input) keccak512_4way( &ctx_keccak, vhash, 64 ); keccak512_4way_close( &ctx_keccak, vhash ); - mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); + mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); // hash echo serially init_echo( &ctx_echo, 512 ); @@ -59,7 +59,8 @@ void tribus_hash_4way(void *state, const void *input) memcpy( state+96, hash3, 32 ); } -int scanhash_tribus_4way(int thr_id, struct work *work, uint32_t max_nonce, uint64_t *hashes_done) +int scanhash_tribus_4way( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr) { uint32_t hash[4*8] __attribute__ ((aligned (64))); uint32_t vdata[20*4] __attribute__ ((aligned (64))); @@ -69,9 +70,8 @@ int scanhash_tribus_4way(int thr_id, struct work *work, uint32_t max_nonce, uint const uint32_t first_nonce = pdata[19]; const uint32_t Htarg = ptarget[7]; uint32_t n = pdata[19]; - uint32_t *nonces = work->nonces; - int num_found = 0; uint32_t *noncep = vdata + 73; // 9*8 + 1 + int thr_id = mythr->id; // thr_id arg is deprecated uint64_t htmax[] = { 0, 0xF, @@ -94,7 +94,7 @@ int scanhash_tribus_4way(int thr_id, struct work *work, uint32_t max_nonce, uint } uint64_t *edata = (uint64_t*)endiandata; - mm256_interleave_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 ); + mm256_intrlv_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 ); // precalc midstate // doing it one way then then interleaving would be faster but too @@ -119,21 +119,19 @@ int scanhash_tribus_4way(int thr_id, struct work *work, uint32_t max_nonce, uint for ( int i = 0; i < 4; i++ ) if ( ( !( (hash+(i<<3))[7] & mask ) ) - && fulltest( hash+(i<<3), ptarget ) ) + && fulltest( hash+(i<<3), ptarget ) && !opt_benchmark ) { pdata[19] = n+i; - nonces[ num_found++ ] = n+i; - work_set_target_ratio( work, hash+(i<<3) ); + submit_lane_solution( work, hash+(i<<3), mythr, i ); } n += 4; - } while ( (num_found == 0) && ( n < max_nonce ) - && !work_restart[thr_id].restart); + } while ( ( n < max_nonce ) && !work_restart[thr_id].restart); break; } } *hashes_done = n - first_nonce + 1; - return num_found; + return 0; } #endif diff --git a/algo/x11/tribus-gate.h b/algo/x11/tribus-gate.h index d3d03158..dca51b45 100644 --- a/algo/x11/tribus-gate.h +++ b/algo/x11/tribus-gate.h @@ -14,15 +14,15 @@ void tribus_hash_4way( void *state, const void *input ); -int scanhash_tribus_4way( int thr_id, struct work *work, uint32_t max_nonce, - uint64_t *hashes_done ); +int scanhash_tribus_4way( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ); #else void tribus_hash( void *state, const void *input ); -int scanhash_tribus( int thr_id, struct work *work, uint32_t max_nonce, - uint64_t *hashes_done ); +int scanhash_tribus( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ); bool tribus_thread_init(); diff --git a/algo/x11/tribus.c b/algo/x11/tribus.c index e5845867..2d346fdf 100644 --- a/algo/x11/tribus.c +++ b/algo/x11/tribus.c @@ -60,7 +60,8 @@ void tribus_hash(void *state, const void *input) memcpy(state, hash, 32); } -int scanhash_tribus(int thr_id, struct work *work, uint32_t max_nonce, uint64_t *hashes_done) +int scanhash_tribus( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t _ALIGN(128) hash32[8]; uint32_t _ALIGN(128) endiandata[20]; @@ -69,6 +70,7 @@ int scanhash_tribus(int thr_id, struct work *work, uint32_t max_nonce, uint64_t const uint32_t first_nonce = pdata[19]; const uint32_t Htarg = ptarget[7]; uint32_t n = pdata[19] - 1; + int thr_id = mythr->id; // thr_id arg is deprecated uint64_t htmax[] = { 0, diff --git a/algo/x11/x11-4way.c b/algo/x11/x11-4way.c index 47168034..6a863e3c 100644 --- a/algo/x11/x11-4way.c +++ b/algo/x11/x11-4way.c @@ -69,7 +69,7 @@ void x11_4way_hash( void *state, const void *input ) bmw512_4way_close( &ctx.bmw, vhash ); // Serial - mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); + mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); // 3 Groestl update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 ); @@ -81,7 +81,7 @@ void x11_4way_hash( void *state, const void *input ) update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3, 512 ); // 4way - mm256_interleave_4x64( vhash, hash0, hash1, hash2, hash3, 512 ); + mm256_intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 ); // 4 Skein skein512_4way( &ctx.skein, vhash, 64 ); @@ -95,16 +95,16 @@ void x11_4way_hash( void *state, const void *input ) keccak512_4way( &ctx.keccak, vhash, 64 ); keccak512_4way_close( &ctx.keccak, vhash ); - mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); + mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); // 7 Luffa parallel 2 way 128 bit - mm256_interleave_2x128( vhash, hash0, hash1, 512 ); - mm256_interleave_2x128( vhashB, hash2, hash3, 512 ); + mm256_intrlv_2x128( vhash, hash0, hash1, 512 ); + mm256_intrlv_2x128( vhashB, hash2, hash3, 512 ); luffa_2way_update_close( &ctx.luffa, vhash, vhash, 64 ); luffa_2way_init( &ctx.luffa, 512 ); luffa_2way_update_close( &ctx.luffa, vhashB, vhashB, 64 ); - mm256_deinterleave_2x128( hash0, hash1, vhash, 512 ); - mm256_deinterleave_2x128( hash2, hash3, vhashB, 512 ); + mm256_dintrlv_2x128( hash0, hash1, vhash, 512 ); + mm256_dintrlv_2x128( hash2, hash3, vhashB, 512 ); // 8 Cubehash cubehashUpdateDigest( &ctx.cube, (byte*)hash0, (const byte*) hash0, 64 ); @@ -132,13 +132,13 @@ void x11_4way_hash( void *state, const void *input ) sph_shavite512_close( &ctx.shavite, hash3 ); // 10 Simd - mm256_interleave_2x128( vhash, hash0, hash1, 512 ); - mm256_interleave_2x128( vhashB, hash2, hash3, 512 ); + mm256_intrlv_2x128( vhash, hash0, hash1, 512 ); + mm256_intrlv_2x128( vhashB, hash2, hash3, 512 ); simd_2way_update_close( &ctx.simd, vhash, vhash, 512 ); simd_2way_init( &ctx.simd, 512 ); simd_2way_update_close( &ctx.simd, vhashB, vhashB, 512 ); - mm256_deinterleave_2x128( hash0, hash1, vhash, 512 ); - mm256_deinterleave_2x128( hash2, hash3, vhashB, 512 ); + mm256_dintrlv_2x128( hash0, hash1, vhash, 512 ); + mm256_dintrlv_2x128( hash2, hash3, vhashB, 512 ); // 11 Echo update_final_echo( &ctx.echo, (BitSequence *)hash0, @@ -159,8 +159,8 @@ void x11_4way_hash( void *state, const void *input ) memcpy( state+96, hash3, 32 ); } -int scanhash_x11_4way( int thr_id, struct work *work, uint32_t max_nonce, - uint64_t *hashes_done ) +int scanhash_x11_4way( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t hash[4*8] __attribute__ ((aligned (64))); uint32_t vdata[24*4] __attribute__ ((aligned (64))); @@ -169,8 +169,7 @@ int scanhash_x11_4way( int thr_id, struct work *work, uint32_t max_nonce, uint32_t *ptarget = work->target; uint32_t n = pdata[19]; const uint32_t first_nonce = pdata[19]; - uint32_t *nonces = work->nonces; - int num_found = 0; + int thr_id = mythr->id; // thr_id arg is deprecated uint32_t *noncep = vdata + 73; // 9*8 + 1 const uint32_t Htarg = ptarget[7]; uint64_t htmax[] = { 0, 0xF, 0xFF, @@ -182,7 +181,7 @@ int scanhash_x11_4way( int thr_id, struct work *work, uint32_t max_nonce, swab32_array( endiandata, pdata, 20 ); uint64_t *edata = (uint64_t*)endiandata; - mm256_interleave_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 ); + mm256_intrlv_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 ); for (int m=0; m < 6; m++) if (Htarg <= htmax[m]) @@ -200,20 +199,18 @@ int scanhash_x11_4way( int thr_id, struct work *work, uint32_t max_nonce, for ( int i = 0; i < 4; i++ ) if ( ( ( (hash+(i<<3))[7] & mask ) == 0 ) - && fulltest( hash+(i<<3), ptarget ) ) + && fulltest( hash+(i<<3), ptarget ) && !opt_benchmark ) { pdata[19] = n+i; - nonces[ num_found++ ] = n+i; - work_set_target_ratio( work, hash+(i<<3) ); + submit_lane_solution( work, hash+(i<<3), mythr, i ); } n += 4; - } while ( ( num_found == 0 ) && ( n < max_nonce ) - && !work_restart[thr_id].restart ); + } while ( ( n < max_nonce ) && !work_restart[thr_id].restart ); break; } *hashes_done = n - first_nonce + 1; - return num_found; + return 0; } #endif diff --git a/algo/x11/x11-gate.h b/algo/x11/x11-gate.h index 69106dbe..702dcaa0 100644 --- a/algo/x11/x11-gate.h +++ b/algo/x11/x11-gate.h @@ -14,8 +14,8 @@ bool register_x11_algo( algo_gate_t* gate ); void x11_4way_hash( void *state, const void *input ); -int scanhash_x11_4way( int thr_id, struct work *work, uint32_t max_nonce, - uint64_t *hashes_done ); +int scanhash_x11_4way( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ); void init_x11_4way_ctx(); @@ -23,8 +23,8 @@ void init_x11_4way_ctx(); void x11_hash( void *state, const void *input ); -int scanhash_x11( int thr_id, struct work *work, uint32_t max_nonce, - uint64_t *hashes_done ); +int scanhash_x11( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ); void init_x11_ctx(); diff --git a/algo/x11/x11.c b/algo/x11/x11.c index b8e1710c..fb641a39 100644 --- a/algo/x11/x11.c +++ b/algo/x11/x11.c @@ -133,8 +133,8 @@ void x11_hash( void *state, const void *input ) memcpy( state, hash+64, 32 ); } -int scanhash_x11( int thr_id, struct work *work, uint32_t max_nonce, - uint64_t *hashes_done ) +int scanhash_x11( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t endiandata[20] __attribute__((aligned(64))); uint32_t hash64[8] __attribute__((aligned(64))); @@ -142,6 +142,7 @@ int scanhash_x11( int thr_id, struct work *work, uint32_t max_nonce, uint32_t *ptarget = work->target; uint32_t n = pdata[19] - 1; const uint32_t first_nonce = pdata[19]; + int thr_id = mythr->id; const uint32_t Htarg = ptarget[7]; uint64_t htmax[] = { 0, diff --git a/algo/x11/x11evo-4way.c b/algo/x11/x11evo-4way.c index 40be4f10..eebc28dd 100644 --- a/algo/x11/x11evo-4way.c +++ b/algo/x11/x11evo-4way.c @@ -87,18 +87,18 @@ void x11evo_4way_hash( void *state, const void *input ) case 0: blake512_4way( &ctx.blake, input, 80 ); blake512_4way_close( &ctx.blake, vhash ); - mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, + mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 64<<3 ); break; case 1: bmw512_4way( &ctx.bmw, vhash, 64 ); bmw512_4way_close( &ctx.bmw, vhash ); if ( i >= len-1 ) - mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, + mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 64<<3 ); break; case 2: - mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, + mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 64<<3 ); update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 ); @@ -112,46 +112,46 @@ void x11evo_4way_hash( void *state, const void *input ) update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3, 512 ); if ( i < len-1 ) - mm256_interleave_4x64( vhash, + mm256_intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 64<<3 ); break; case 3: skein512_4way( &ctx.skein, vhash, 64 ); skein512_4way_close( &ctx.skein, vhash ); if ( i >= len-1 ) - mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, + mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 64<<3 ); break; case 4: jh512_4way( &ctx.jh, vhash, 64 ); jh512_4way_close( &ctx.jh, vhash ); if ( i >= len-1 ) - mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, + mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 64<<3 ); break; case 5: keccak512_4way( &ctx.keccak, vhash, 64 ); keccak512_4way_close( &ctx.keccak, vhash ); if ( i >= len-1 ) - mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, + mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 64<<3 ); break; case 6: - mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, + mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 64<<3 ); - mm256_interleave_2x128( vhash, hash0, hash1, 64<<3 ); + mm256_intrlv_2x128( vhash, hash0, hash1, 64<<3 ); luffa_2way_update_close( &ctx.luffa, vhash, vhash, 64 ); - mm256_deinterleave_2x128( hash0, hash1, vhash, 64<<3 ); - mm256_interleave_2x128( vhash, hash2, hash3, 64<<3 ); + mm256_dintrlv_2x128( hash0, hash1, vhash, 64<<3 ); + mm256_intrlv_2x128( vhash, hash2, hash3, 64<<3 ); luffa_2way_init( &ctx.luffa, 512 ); luffa_2way_update_close( &ctx.luffa, vhash, vhash, 64 ); - mm256_deinterleave_2x128( hash2, hash3, vhash, 64<<3 ); + mm256_dintrlv_2x128( hash2, hash3, vhash, 64<<3 ); if ( i < len-1 ) - mm256_interleave_4x64( vhash, + mm256_intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 64<<3 ); break; case 7: - mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, + mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 64<<3 ); cubehashUpdateDigest( &ctx.cube, (byte*)hash0, (const byte*) hash0, 64 ); @@ -165,11 +165,11 @@ void x11evo_4way_hash( void *state, const void *input ) cubehashUpdateDigest( &ctx.cube, (byte*)hash3, (const byte*) hash3, 64 ); if ( i < len-1 ) - mm256_interleave_4x64( vhash, + mm256_intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 64<<3 ); break; case 8: - mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, + mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 64<<3 ); sph_shavite512( &ctx.shavite, hash0, 64 ); sph_shavite512_close( &ctx.shavite, hash0 ); @@ -186,25 +186,25 @@ void x11evo_4way_hash( void *state, const void *input ) sph_shavite512( &ctx.shavite, hash3, 64 ); sph_shavite512_close( &ctx.shavite, hash3 ); if ( i < len-1 ) - mm256_interleave_4x64( vhash, + mm256_intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 64<<3 ); break; case 9: - mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, + mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 64<<3 ); - mm256_interleave_2x128( vhash, hash0, hash1, 64<<3 ); + mm256_intrlv_2x128( vhash, hash0, hash1, 64<<3 ); simd_2way_update_close( &ctx.simd, vhash, vhash, 64<<3 ); - mm256_deinterleave_2x128( hash0, hash1, vhash, 64<<3 ); - mm256_interleave_2x128( vhash, hash2, hash3, 64<<3 ); + mm256_dintrlv_2x128( hash0, hash1, vhash, 64<<3 ); + mm256_intrlv_2x128( vhash, hash2, hash3, 64<<3 ); simd_2way_init( &ctx.simd, 512 ); simd_2way_update_close( &ctx.simd, vhash, vhash, 64<<3 ); - mm256_deinterleave_2x128( hash2, hash3, vhash, 64<<3 ); + mm256_dintrlv_2x128( hash2, hash3, vhash, 64<<3 ); if ( i < len-1 ) - mm256_interleave_4x64( vhash, + mm256_intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 64<<3 ); break; case 10: - mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, + mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 64<<3 ); update_final_echo( &ctx.echo, (BitSequence *)hash0, (const BitSequence *) hash0, 512 ); @@ -218,7 +218,7 @@ void x11evo_4way_hash( void *state, const void *input ) update_final_echo( &ctx.echo, (BitSequence *)hash3, (const BitSequence *) hash3, 512 ); if ( i < len-1 ) - mm256_interleave_4x64( vhash, + mm256_intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 64<<3 ); break; } @@ -232,8 +232,8 @@ void x11evo_4way_hash( void *state, const void *input ) //static const uint32_t diff1targ = 0x0000ffff; -int scanhash_x11evo_4way( int thr_id, struct work* work, uint32_t max_nonce, - uint64_t *hashes_done ) +int scanhash_x11evo_4way( struct work* work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t hash[4*8] __attribute__ ((aligned (64))); uint32_t vdata[24*4] __attribute__ ((aligned (64))); @@ -242,8 +242,7 @@ int scanhash_x11evo_4way( int thr_id, struct work* work, uint32_t max_nonce, uint32_t *ptarget = work->target; uint32_t n = pdata[19]; const uint32_t first_nonce = pdata[19]; - uint32_t *nonces = work->nonces; - int num_found = 0; + int thr_id = mythr->id; // thr_id arg is deprecated uint32_t *noncep = vdata + 73; // 9*8 + 1 const uint32_t Htarg = ptarget[7]; @@ -270,7 +269,7 @@ int scanhash_x11evo_4way( int thr_id, struct work* work, uint32_t max_nonce, } uint64_t *edata = (uint64_t*)endiandata; - mm256_interleave_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 ); + mm256_intrlv_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 ); do { @@ -284,18 +283,16 @@ int scanhash_x11evo_4way( int thr_id, struct work* work, uint32_t max_nonce, for ( int i = 0; i < 4; i++ ) if ( ( ( (hash+(i<<3))[7] & hmask ) == 0 ) - && fulltest( hash+(i<<3), ptarget ) ) + && fulltest( hash+(i<<3), ptarget ) && !opt_benchmark ) { pdata[19] = n+i; - nonces[ num_found++ ] = n+i; - work_set_target_ratio( work, hash+(i<<3) ); + submit_lane_solution( work, hash+(i<<3), mythr, i ); } n += 4; - } while ( ( num_found == 0 ) && ( n < max_nonce ) - && !work_restart[thr_id].restart ); + } while ( ( n < max_nonce ) && !work_restart[thr_id].restart ); *hashes_done = n - first_nonce + 1; - return num_found; + return 0; } #endif diff --git a/algo/x11/x11evo-gate.h b/algo/x11/x11evo-gate.h index 7be09d81..515f1b33 100644 --- a/algo/x11/x11evo-gate.h +++ b/algo/x11/x11evo-gate.h @@ -19,8 +19,8 @@ bool register_x11evo_algo( algo_gate_t* gate ); void x11evo_4way_hash( void *state, const void *input ); -int scanhash_x11evo_4way( int thr_id, struct work *work, uint32_t max_nonce, - uint64_t *hashes_done ); +int scanhash_x11evo_4way( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ); void init_x11evo_4way_ctx(); @@ -28,8 +28,8 @@ void init_x11evo_4way_ctx(); void x11evo_hash( void *state, const void *input ); -int scanhash_x11evo( int thr_id, struct work *work, uint32_t max_nonce, - uint64_t *hashes_done ); +int scanhash_x11evo( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ); void init_x11evo_ctx(); diff --git a/algo/x11/x11evo.c b/algo/x11/x11evo.c index fdfca158..d58f124c 100644 --- a/algo/x11/x11evo.c +++ b/algo/x11/x11evo.c @@ -157,8 +157,8 @@ void x11evo_hash( void *state, const void *input ) //static const uint32_t diff1targ = 0x0000ffff; -int scanhash_x11evo( int thr_id, struct work* work, uint32_t max_nonce, - uint64_t *hashes_done ) +int scanhash_x11evo( struct work* work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t endiandata[20] __attribute__((aligned(64))); uint32_t hash64[8] __attribute__((aligned(64))); @@ -166,6 +166,7 @@ int scanhash_x11evo( int thr_id, struct work* work, uint32_t max_nonce, uint32_t *ptarget = work->target; uint32_t n = pdata[19] - 1; const uint32_t first_nonce = pdata[19]; + int thr_id = mythr->id; // thr_id arg is deprecated const uint32_t Htarg = ptarget[7]; swab32_array( endiandata, pdata, 20 ); diff --git a/algo/x11/x11gost-4way.c b/algo/x11/x11gost-4way.c index c5f56337..6d90db56 100644 --- a/algo/x11/x11gost-4way.c +++ b/algo/x11/x11gost-4way.c @@ -70,7 +70,7 @@ void x11gost_4way_hash( void *state, const void *input ) bmw512_4way_close( &ctx.bmw, vhash ); // Serial - mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); + mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 ); memcpy( &ctx.groestl, &x11gost_4way_ctx.groestl, @@ -84,7 +84,7 @@ void x11gost_4way_hash( void *state, const void *input ) update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3, 512 ); // 4way - mm256_interleave_4x64( vhash, hash0, hash1, hash2, hash3, 512 ); + mm256_intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 ); skein512_4way( &ctx.skein, vhash, 64 ); skein512_4way_close( &ctx.skein, vhash ); @@ -96,7 +96,7 @@ void x11gost_4way_hash( void *state, const void *input ) keccak512_4way_close( &ctx.keccak, vhash ); // Serial - mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); + mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); sph_gost512( &ctx.gost, hash0, 64 ); sph_gost512_close( &ctx.gost, hash0 ); @@ -110,13 +110,13 @@ void x11gost_4way_hash( void *state, const void *input ) sph_gost512( &ctx.gost, hash3, 64 ); sph_gost512_close( &ctx.gost, hash3 ); - mm256_interleave_2x128( vhash, hash0, hash1, 512 ); + mm256_intrlv_2x128( vhash, hash0, hash1, 512 ); luffa_2way_update_close( &ctx.luffa, vhash, vhash, 64 ); - mm256_deinterleave_2x128( hash0, hash1, vhash, 512 ); - mm256_interleave_2x128( vhash, hash2, hash3, 512 ); + mm256_dintrlv_2x128( hash0, hash1, vhash, 512 ); + mm256_intrlv_2x128( vhash, hash2, hash3, 512 ); luffa_2way_init( &ctx.luffa, 512 ); luffa_2way_update_close( &ctx.luffa, vhash, vhash, 64 ); - mm256_deinterleave_2x128( hash2, hash3, vhash, 512 ); + mm256_dintrlv_2x128( hash2, hash3, vhash, 512 ); cubehashUpdateDigest( &ctx.cube, (byte*)hash0, (const byte*) hash0, 64 ); memcpy( &ctx.cube, &x11gost_4way_ctx.cube, sizeof(cubehashParam) ); @@ -141,12 +141,12 @@ void x11gost_4way_hash( void *state, const void *input ) sph_shavite512( &ctx.shavite, hash3, 64 ); sph_shavite512_close( &ctx.shavite, hash3 ); - mm256_interleave_2x128( vhash, hash0, hash1, 512 ); + mm256_intrlv_2x128( vhash, hash0, hash1, 512 ); simd_2way_update_close( &ctx.simd, vhash, vhash, 512 ); - mm256_deinterleave_2x128( hash0, hash1, vhash, 512 ); - mm256_interleave_2x128( vhash, hash2, hash3, 512 ); + mm256_dintrlv_2x128( hash0, hash1, vhash, 512 ); + mm256_intrlv_2x128( vhash, hash2, hash3, 512 ); simd_2way_update_close( &ctx.simd, vhash, vhash, 512 ); - mm256_deinterleave_2x128( hash2, hash3, vhash, 512 ); + mm256_dintrlv_2x128( hash2, hash3, vhash, 512 ); update_final_echo( &ctx.echo, (BitSequence *)hash0, (const BitSequence *) hash0, 512 ); @@ -166,8 +166,8 @@ void x11gost_4way_hash( void *state, const void *input ) memcpy( state+96, hash3, 32 ); } -int scanhash_x11gost_4way( int thr_id, struct work *work, uint32_t max_nonce, - uint64_t *hashes_done ) +int scanhash_x11gost_4way( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t hash[4*8] __attribute__ ((aligned (64))); uint32_t vdata[24*4] __attribute__ ((aligned (64))); @@ -176,8 +176,7 @@ int scanhash_x11gost_4way( int thr_id, struct work *work, uint32_t max_nonce, uint32_t *ptarget = work->target; uint32_t n = pdata[19]; const uint32_t first_nonce = pdata[19]; - uint32_t *nonces = work->nonces; - int num_found = 0; + int thr_id = mythr->id; // thr_id arg is deprecated uint32_t *noncep = vdata + 73; // 9*8 + 1 const uint32_t Htarg = ptarget[7]; uint64_t htmax[] = { 0, 0xF, 0xFF, @@ -189,7 +188,7 @@ int scanhash_x11gost_4way( int thr_id, struct work *work, uint32_t max_nonce, swab32_array( endiandata, pdata, 20 ); uint64_t *edata = (uint64_t*)endiandata; - mm256_interleave_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 ); + mm256_intrlv_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 ); for (int m=0; m < 6; m++) if (Htarg <= htmax[m]) @@ -207,20 +206,18 @@ int scanhash_x11gost_4way( int thr_id, struct work *work, uint32_t max_nonce, for ( int i = 0; i < 4; i++ ) if ( ( ( (hash+(i<<3))[7] & mask ) == 0 ) - && fulltest( hash+(i<<3), ptarget ) ) + && fulltest( hash+(i<<3), ptarget ) && !opt_benchmark ) { pdata[19] = n+i; - nonces[ num_found++ ] = n+i; - work_set_target_ratio( work, hash+(i<<3) ); + submit_lane_solution( work, hash+(i<<3), mythr, i ); } n += 4; - } while ( ( num_found == 0 ) && ( n < max_nonce ) - && !work_restart[thr_id].restart ); + } while ( ( n < max_nonce ) && !work_restart[thr_id].restart ); break; } *hashes_done = n - first_nonce + 1; - return num_found; + return 0; } #endif diff --git a/algo/x11/x11gost-gate.h b/algo/x11/x11gost-gate.h index cd486269..e090104b 100644 --- a/algo/x11/x11gost-gate.h +++ b/algo/x11/x11gost-gate.h @@ -14,8 +14,8 @@ bool register_x11gost_algo( algo_gate_t* gate ); void x11gost_4way_hash( void *state, const void *input ); -int scanhash_x11gost_4way( int thr_id, struct work *work, uint32_t max_nonce, - uint64_t *hashes_done ); +int scanhash_x11gost_4way( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ); void init_x11gost_4way_ctx(); @@ -23,8 +23,8 @@ void init_x11gost_4way_ctx(); void x11gost_hash( void *state, const void *input ); -int scanhash_x11gost( int thr_id, struct work *work, uint32_t max_nonce, - uint64_t *hashes_done ); +int scanhash_x11gost( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ); void init_x11gost_ctx(); diff --git a/algo/x11/x11gost.c b/algo/x11/x11gost.c index 7caff1d6..dd6964dc 100644 --- a/algo/x11/x11gost.c +++ b/algo/x11/x11gost.c @@ -135,14 +135,15 @@ void x11gost_hash(void *output, const void *input) memcpy(output, hashA, 32); } -int scanhash_x11gost( int thr_id, struct work *work, uint32_t max_nonce, - uint64_t *hashes_done) +int scanhash_x11gost( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t *pdata = work->data; uint32_t *ptarget = work->target; const uint32_t first_nonce = pdata[19]; uint32_t _ALIGN(64) endiandata[20]; + int thr_id = mythr->id; // thr_id arg is deprecated uint32_t nonce = first_nonce; volatile uint8_t *restart = &(work_restart[thr_id].restart); diff --git a/algo/x12/x12-4way.c b/algo/x12/x12-4way.c index 1b1731cd..3a0cde78 100644 --- a/algo/x12/x12-4way.c +++ b/algo/x12/x12-4way.c @@ -74,7 +74,7 @@ void x12_4way_hash( void *state, const void *input ) bmw512_4way_close( &ctx.bmw, vhash ); // Serial - mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); + mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); // 3 Groestl update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 ); @@ -86,7 +86,7 @@ void x12_4way_hash( void *state, const void *input ) update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3, 512 ); // Parallel 4way 64 bit - mm256_interleave_4x64( vhash, hash0, hash1, hash2, hash3, 512 ); + mm256_intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 ); // 4 Skein skein512_4way( &ctx.skein, vhash, 64 ); @@ -101,16 +101,16 @@ void x12_4way_hash( void *state, const void *input ) keccak512_4way_close( &ctx.keccak, vhash ); // Serial - mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); + mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); // 7 Luffa - mm256_interleave_2x128( vhash, hash0, hash1, 512 ); + mm256_intrlv_2x128( vhash, hash0, hash1, 512 ); luffa_2way_update_close( &ctx.luffa, vhash, vhash, 64 ); - mm256_deinterleave_2x128( hash0, hash1, vhash, 512 ); - mm256_interleave_2x128( vhash, hash2, hash3, 512 ); + mm256_dintrlv_2x128( hash0, hash1, vhash, 512 ); + mm256_intrlv_2x128( vhash, hash2, hash3, 512 ); luffa_2way_init( &ctx.luffa, 512 ); luffa_2way_update_close( &ctx.luffa, vhash, vhash, 64 ); - mm256_deinterleave_2x128( hash2, hash3, vhash, 512 ); + mm256_dintrlv_2x128( hash2, hash3, vhash, 512 ); // 8 Cubehash cubehashUpdateDigest( &ctx.cube, (byte*)hash0, (const byte*) hash0, 64 ); @@ -138,13 +138,13 @@ void x12_4way_hash( void *state, const void *input ) sph_shavite512_close( &ctx.shavite, hash3 ); // 10 Simd - mm256_interleave_2x128( vhash, hash0, hash1, 512 ); + mm256_intrlv_2x128( vhash, hash0, hash1, 512 ); simd_2way_update_close( &ctx.simd, vhash, vhash, 512 ); - mm256_deinterleave_2x128( hash0, hash1, vhash, 512 ); - mm256_interleave_2x128( vhash, hash2, hash3, 512 ); + mm256_dintrlv_2x128( hash0, hash1, vhash, 512 ); + mm256_intrlv_2x128( vhash, hash2, hash3, 512 ); simd_2way_init( &ctx.simd, 512 ); simd_2way_update_close( &ctx.simd, vhash, vhash, 512 ); - mm256_deinterleave_2x128( hash2, hash3, vhash, 512 ); + mm256_dintrlv_2x128( hash2, hash3, vhash, 512 ); // 11 Echo update_final_echo( &ctx.echo, (BitSequence *)hash0, @@ -160,36 +160,15 @@ void x12_4way_hash( void *state, const void *input ) (const BitSequence *) hash3, 512 ); // 12 Hamsi parallel 4way 32 bit - mm256_interleave_4x64( vhash, hash0, hash1, hash2, hash3, 512 ); + mm256_intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 ); hamsi512_4way( &ctx.hamsi, vhash, 64 ); hamsi512_4way_close( &ctx.hamsi, vhash ); - mm256_deinterleave_4x64( state, state+32, state+64, state+96, vhash, 256 ); - - -/* - // 13 Fugue serial - sph_fugue512( &ctx.fugue, hash0, 64 ); - sph_fugue512_close( &ctx.fugue, hash0 ); - memcpy( &ctx.fugue, &x13_4way_ctx.fugue, sizeof(sph_fugue512_context) ); - sph_fugue512( &ctx.fugue, hash1, 64 ); - sph_fugue512_close( &ctx.fugue, hash1 ); - memcpy( &ctx.fugue, &x13_4way_ctx.fugue, sizeof(sph_fugue512_context) ); - sph_fugue512( &ctx.fugue, hash2, 64 ); - sph_fugue512_close( &ctx.fugue, hash2 ); - memcpy( &ctx.fugue, &x13_4way_ctx.fugue, sizeof(sph_fugue512_context) ); - sph_fugue512( &ctx.fugue, hash3, 64 ); - sph_fugue512_close( &ctx.fugue, hash3 ); - - memcpy( state, hash0, 32 ); - memcpy( state+32, hash1, 32 ); - memcpy( state+64, hash2, 32 ); - memcpy( state+96, hash3, 32 ); -*/ + mm256_dintrlv_4x64( state, state+32, state+64, state+96, vhash, 256 ); } -int scanhash_x12_4way( int thr_id, struct work *work, uint32_t max_nonce, - uint64_t *hashes_done ) +int scanhash_x12_4way( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t hash[4*8] __attribute__ ((aligned (64))); uint32_t vdata[24*4] __attribute__ ((aligned (64))); @@ -198,9 +177,8 @@ int scanhash_x12_4way( int thr_id, struct work *work, uint32_t max_nonce, uint32_t *ptarget = work->target; uint32_t n = pdata[19]; const uint32_t first_nonce = pdata[19]; - uint32_t *nonces = work->nonces; - int num_found = 0; uint32_t *noncep = vdata + 73; // 9*8 + 1 + int thr_id = mythr->id; // thr_id arg is deprecated const uint32_t Htarg = ptarget[7]; uint64_t htmax[] = { 0, 0xF, 0xFF, 0xFFF, 0xFFFF, 0x10000000 }; @@ -211,7 +189,7 @@ int scanhash_x12_4way( int thr_id, struct work *work, uint32_t max_nonce, swab32_array( endiandata, pdata, 20 ); uint64_t *edata = (uint64_t*)endiandata; - mm256_interleave_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 ); + mm256_intrlv_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 ); for ( int m=0; m < 6; m++ ) if ( Htarg <= htmax[m] ) @@ -228,21 +206,19 @@ int scanhash_x12_4way( int thr_id, struct work *work, uint32_t max_nonce, pdata[19] = n; for ( int i = 0; i < 4; i++ ) - if ( ( ( (hash+(i<<3))[7] & mask ) == 0 ) - && fulltest( hash+(i<<3), ptarget ) ) + if ( ( (hash+(i<<3))[7] & mask ) == 0 ) + if ( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark ) { pdata[19] = n+i; - nonces[ num_found++ ] = n+i; - work_set_target_ratio( work, hash+(i<<3) ); + submit_lane_solution( work, hash+(i<<3), mythr, i ); } n += 4; - } while ( ( num_found == 0 ) && ( n < max_nonce ) - && !work_restart[thr_id].restart ); + } while ( ( n < max_nonce ) && !work_restart[thr_id].restart ); break; } *hashes_done = n - first_nonce + 1; - return num_found; + return 0; } #endif diff --git a/algo/x12/x12-gate.h b/algo/x12/x12-gate.h index f6f59bd6..e26956e9 100644 --- a/algo/x12/x12-gate.h +++ b/algo/x12/x12-gate.h @@ -14,8 +14,8 @@ bool register_x12_algo( algo_gate_t* gate ); void x12_4way_hash( void *state, const void *input ); -int scanhash_x12_4way( int thr_id, struct work *work, uint32_t max_nonce, - uint64_t *hashes_done ); +int scanhash_x12_4way( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ); void init_x12_4way_ctx(); @@ -23,8 +23,8 @@ void init_x12_4way_ctx(); void x12hash( void *state, const void *input ); -int scanhash_x12( int thr_id, struct work *work, uint32_t max_nonce, - uint64_t *hashes_done ); +int scanhash_x12( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ); void init_x12_ctx(); diff --git a/algo/x12/x12.c b/algo/x12/x12.c index ce6f2a46..87a4fa69 100644 --- a/algo/x12/x12.c +++ b/algo/x12/x12.c @@ -164,8 +164,8 @@ void x12hash(void *output, const void *input) memcpy(output, hashB, 32); } -int scanhash_x12(int thr_id, struct work *work, uint32_t max_nonce, - uint64_t *hashes_done) +int scanhash_x12( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t endiandata[20] __attribute__((aligned(64))); uint32_t hash64[8] __attribute__((aligned(64))); @@ -173,6 +173,7 @@ int scanhash_x12(int thr_id, struct work *work, uint32_t max_nonce, uint32_t *ptarget = work->target; uint32_t n = pdata[19] - 1; const uint32_t first_nonce = pdata[19]; + int thr_id = mythr->id; // thr_id arg is deprecated const uint32_t Htarg = ptarget[7]; uint64_t htmax[] = { diff --git a/algo/x13/drop.c b/algo/x13/drop.c index b436301e..3990c3f0 100644 --- a/algo/x13/drop.c +++ b/algo/x13/drop.c @@ -175,7 +175,8 @@ static void droplp_hash_pok(void *output, uint32_t *pdata, const uint32_t versio memcpy(output, hash, 32); } -int scanhash_drop(int thr_id, struct work *work, uint32_t max_nonce, uint64_t *hashes_done) +int scanhash_drop( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t _ALIGN(64) hash[16]; uint32_t *pdata = work->data; @@ -183,6 +184,7 @@ int scanhash_drop(int thr_id, struct work *work, uint32_t max_nonce, uint64_t *h const uint32_t version = pdata[0] & (~POK_DATA_MASK); const uint32_t first_nonce = pdata[19]; uint32_t nonce = first_nonce; + int thr_id = mythr->id; // thr_id arg is deprecated #define tmpdata pdata if (opt_benchmark) diff --git a/algo/x13/phi1612-4way.c b/algo/x13/phi1612-4way.c index aab523e3..5ec921f3 100644 --- a/algo/x13/phi1612-4way.c +++ b/algo/x13/phi1612-4way.c @@ -53,7 +53,7 @@ void phi1612_4way_hash( void *state, const void *input ) jh512_4way_close( &ctx.jh, vhash ); // Serial to the end - mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); + mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); // Cubehash cubehashUpdateDigest( &ctx.cube, (byte*)hash0, (const byte*) hash0, 64 ); @@ -109,52 +109,41 @@ void phi1612_4way_hash( void *state, const void *input ) memcpy( state+96, hash3, 32 ); } -int scanhash_phi1612_4way( int thr_id, struct work *work, uint32_t max_nonce, - uint64_t *hashes_done ) +int scanhash_phi1612_4way( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t hash[4*8] __attribute__ ((aligned (64))); uint32_t vdata[24*4] __attribute__ ((aligned (64))); uint32_t *pdata = work->data; uint32_t *ptarget = work->target; const uint32_t first_nonce = pdata[19]; - uint32_t _ALIGN(64) endiandata[20]; uint32_t n = first_nonce; - uint32_t *nonces = work->nonces; - int num_found = 0; - uint32_t *noncep = vdata + 73; // 9*8 + 1 + __m256i *noncev = (__m256i*)vdata + 9; // aligned + int thr_id = mythr->id; // thr_id arg is deprecated const uint32_t Htarg = ptarget[7]; if ( opt_benchmark ) ( (uint32_t*)ptarget )[7] = 0x0cff; - - for ( int k = 0; k < 19; k++ ) - be32enc( &endiandata[k], pdata[k] ); - - uint64_t *edata = (uint64_t*)endiandata; - mm256_interleave_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 ); + mm256_bswap_intrlv80_4x64( vdata, pdata ); do { - be32enc( noncep, n ); - be32enc( noncep+2, n+1 ); - be32enc( noncep+4, n+2 ); - be32enc( noncep+6, n+3 ); + *noncev = mm256_intrlv_blend_32( mm256_bswap_32( + _mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev ); phi1612_4way_hash( hash, vdata ); pdata[19] = n; for ( int i = 0; i < 4; i++ ) - if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) ) + if ( (hash+(i<<3))[7] <= Htarg ) + if ( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark ) { pdata[19] = n+i; - nonces[ num_found++ ] = n+i; - work_set_target_ratio( work, hash+(i<<3) ); + submit_lane_solution( work, hash+(i<<3), mythr, i ); } n += 4; - } while ( ( num_found == 0 ) && ( n < max_nonce ) - && !work_restart[thr_id].restart ); - + } while ( ( n < max_nonce ) && !work_restart[thr_id].restart ); *hashes_done = n - first_nonce + 1; - return num_found; + return 0; } #endif diff --git a/algo/x13/phi1612-gate.h b/algo/x13/phi1612-gate.h index 713ccadc..a1f65187 100644 --- a/algo/x13/phi1612-gate.h +++ b/algo/x13/phi1612-gate.h @@ -14,8 +14,8 @@ bool register_phi1612_algo( algo_gate_t* gate ); void phi1612_4way_hash( void *state, const void *input ); -int scanhash_phi1612_4way( int thr_id, struct work *work, uint32_t max_nonce, - uint64_t *hashes_done ); +int scanhash_phi1612_4way( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ); void init_phi1612_4way_ctx(); @@ -23,8 +23,8 @@ void init_phi1612_4way_ctx(); void phi1612_hash( void *state, const void *input ); -int scanhash_phi1612( int thr_id, struct work *work, uint32_t max_nonce, - uint64_t *hashes_done ); +int scanhash_phi1612( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ); void init_phi1612_ctx(); diff --git a/algo/x13/phi1612.c b/algo/x13/phi1612.c index fe8d4e92..1ea20325 100644 --- a/algo/x13/phi1612.c +++ b/algo/x13/phi1612.c @@ -89,8 +89,8 @@ void phi1612_hash(void *output, const void *input) memcpy(output, hash, 32); } -int scanhash_phi1612( int thr_id, struct work *work, uint32_t max_nonce, - uint64_t *hashes_done ) +int scanhash_phi1612( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t *pdata = work->data; uint32_t *ptarget = work->target; @@ -98,6 +98,7 @@ int scanhash_phi1612( int thr_id, struct work *work, uint32_t max_nonce, const uint32_t first_nonce = pdata[19]; uint32_t _ALIGN(64) endiandata[20]; uint32_t nonce = first_nonce; + int thr_id = mythr->id; volatile uint8_t *restart = &(work_restart[thr_id].restart); if (opt_benchmark) diff --git a/algo/x13/skunk-4way.c b/algo/x13/skunk-4way.c index a639d091..cc1fe1ac 100644 --- a/algo/x13/skunk-4way.c +++ b/algo/x13/skunk-4way.c @@ -33,7 +33,7 @@ void skunk_4way_hash( void *output, const void *input ) skein512_4way( &ctx.skein, input, 80 ); skein512_4way_close( &ctx.skein, vhash ); - mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); + mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); cubehashUpdateDigest( &ctx.cube, (byte*) hash0, (const byte*)hash0, 64 ); memcpy( &ctx.cube, &skunk_4way_ctx.cube, sizeof(cubehashParam) ); @@ -73,8 +73,8 @@ void skunk_4way_hash( void *output, const void *input ) memcpy( output+96, hash3, 32 ); } -int scanhash_skunk_4way( int thr_id, struct work *work, uint32_t max_nonce, - uint64_t *hashes_done ) +int scanhash_skunk_4way( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t hash[4*8] __attribute__ ((aligned (64))); uint32_t vdata[24*4] __attribute__ ((aligned (64))); @@ -83,10 +83,9 @@ int scanhash_skunk_4way( int thr_id, struct work *work, uint32_t max_nonce, uint32_t *ptarget = work->target; const uint32_t first_nonce = pdata[19]; uint32_t n = first_nonce; - uint32_t *nonces = work->nonces; - int num_found = 0; uint32_t *noncep = vdata + 73; // 9*8 + 1 const uint32_t Htarg = ptarget[7]; + int thr_id = mythr->id; // thr_id arg is deprecated volatile uint8_t *restart = &(work_restart[thr_id].restart); if ( opt_benchmark ) @@ -95,7 +94,7 @@ int scanhash_skunk_4way( int thr_id, struct work *work, uint32_t max_nonce, be32enc( &endiandata[k], pdata[k] ); uint64_t *edata = (uint64_t*)endiandata; - mm256_interleave_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 ); + mm256_intrlv_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 ); do { be32enc( noncep, n ); @@ -107,17 +106,17 @@ int scanhash_skunk_4way( int thr_id, struct work *work, uint32_t max_nonce, pdata[19] = n; for ( int i = 0; i < 4; i++ ) - if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) ) + if ( (hash+(i<<3))[7] <= Htarg ) + if ( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark ) { pdata[19] = n+i; - nonces[ num_found++ ] = n+i; - work_set_target_ratio( work, hash+(i<<3) ); + submit_lane_solution( work, hash+(i<<3), mythr, i ); } n +=4; - } while ( ( num_found == 0 ) && ( n < max_nonce ) && !(*restart) ); + } while ( ( n < max_nonce ) && !(*restart) ); *hashes_done = n - first_nonce + 1; - return num_found; + return 0; } bool skunk_4way_thread_init() diff --git a/algo/x13/skunk-gate.h b/algo/x13/skunk-gate.h index d616bf1e..a389f2ed 100644 --- a/algo/x13/skunk-gate.h +++ b/algo/x13/skunk-gate.h @@ -14,8 +14,8 @@ bool register_skunk_algo( algo_gate_t* gate ); void skunk_4way_hash( void *state, const void *input ); -int scanhash_skunk_4way( int thr_id, struct work *work, uint32_t max_nonce, - uint64_t *hashes_done ); +int scanhash_skunk_4way( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ); bool skunk_4way_thread_init(); //void init_skunk_4way_ctx(); @@ -24,8 +24,8 @@ bool skunk_4way_thread_init(); void skunkhash( void *state, const void *input ); -int scanhash_skunk( int thr_id, struct work *work, uint32_t max_nonce, - uint64_t *hashes_done ); +int scanhash_skunk( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ); bool skunk_thread_init(); diff --git a/algo/x13/skunk.c b/algo/x13/skunk.c index 2252aee3..80358e92 100644 --- a/algo/x13/skunk.c +++ b/algo/x13/skunk.c @@ -38,8 +38,8 @@ void skunkhash( void *output, const void *input ) memcpy(output, hash, 32); } -int scanhash_skunk( int thr_id, struct work *work, uint32_t max_nonce, - uint64_t *hashes_done ) +int scanhash_skunk( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t *pdata = work->data; uint32_t *ptarget = work->target; @@ -47,6 +47,7 @@ int scanhash_skunk( int thr_id, struct work *work, uint32_t max_nonce, const uint32_t first_nonce = pdata[19]; uint32_t _ALIGN(64) endiandata[20]; uint32_t nonce = first_nonce; + int thr_id = mythr->id; // thr_id arg is deprecated volatile uint8_t *restart = &(work_restart[thr_id].restart); if ( opt_benchmark ) diff --git a/algo/x13/x13-4way.c b/algo/x13/x13-4way.c index 3b1e1f2b..1bfc7bf3 100644 --- a/algo/x13/x13-4way.c +++ b/algo/x13/x13-4way.c @@ -74,7 +74,7 @@ void x13_4way_hash( void *state, const void *input ) bmw512_4way_close( &ctx.bmw, vhash ); // Serial - mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); + mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); // 3 Groestl update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 ); @@ -86,7 +86,7 @@ void x13_4way_hash( void *state, const void *input ) update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3, 512 ); // Parallel 4way 64 bit - mm256_interleave_4x64( vhash, hash0, hash1, hash2, hash3, 512 ); + mm256_intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 ); // 4 Skein skein512_4way( &ctx.skein, vhash, 64 ); @@ -101,16 +101,16 @@ void x13_4way_hash( void *state, const void *input ) keccak512_4way_close( &ctx.keccak, vhash ); // Serial - mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); + mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); // 7 Luffa - mm256_interleave_2x128( vhash, hash0, hash1, 512 ); + mm256_intrlv_2x128( vhash, hash0, hash1, 512 ); luffa_2way_update_close( &ctx.luffa, vhash, vhash, 64 ); - mm256_deinterleave_2x128( hash0, hash1, vhash, 512 ); - mm256_interleave_2x128( vhash, hash2, hash3, 512 ); + mm256_dintrlv_2x128( hash0, hash1, vhash, 512 ); + mm256_intrlv_2x128( vhash, hash2, hash3, 512 ); luffa_2way_init( &ctx.luffa, 512 ); luffa_2way_update_close( &ctx.luffa, vhash, vhash, 64 ); - mm256_deinterleave_2x128( hash2, hash3, vhash, 512 ); + mm256_dintrlv_2x128( hash2, hash3, vhash, 512 ); // 8 Cubehash cubehashUpdateDigest( &ctx.cube, (byte*)hash0, (const byte*) hash0, 64 ); @@ -138,13 +138,13 @@ void x13_4way_hash( void *state, const void *input ) sph_shavite512_close( &ctx.shavite, hash3 ); // 10 Simd - mm256_interleave_2x128( vhash, hash0, hash1, 512 ); + mm256_intrlv_2x128( vhash, hash0, hash1, 512 ); simd_2way_update_close( &ctx.simd, vhash, vhash, 512 ); - mm256_deinterleave_2x128( hash0, hash1, vhash, 512 ); - mm256_interleave_2x128( vhash, hash2, hash3, 512 ); + mm256_dintrlv_2x128( hash0, hash1, vhash, 512 ); + mm256_intrlv_2x128( vhash, hash2, hash3, 512 ); simd_2way_init( &ctx.simd, 512 ); simd_2way_update_close( &ctx.simd, vhash, vhash, 512 ); - mm256_deinterleave_2x128( hash2, hash3, vhash, 512 ); + mm256_dintrlv_2x128( hash2, hash3, vhash, 512 ); // 11 Echo update_final_echo( &ctx.echo, (BitSequence *)hash0, @@ -160,10 +160,10 @@ void x13_4way_hash( void *state, const void *input ) (const BitSequence *) hash3, 512 ); // 12 Hamsi parallel 4way 32 bit - mm256_interleave_4x64( vhash, hash0, hash1, hash2, hash3, 512 ); + mm256_intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 ); hamsi512_4way( &ctx.hamsi, vhash, 64 ); hamsi512_4way_close( &ctx.hamsi, vhash ); - mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); + mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); // 13 Fugue serial sph_fugue512( &ctx.fugue, hash0, 64 ); @@ -184,8 +184,8 @@ void x13_4way_hash( void *state, const void *input ) memcpy( state+96, hash3, 32 ); } -int scanhash_x13_4way( int thr_id, struct work *work, uint32_t max_nonce, - uint64_t *hashes_done ) +int scanhash_x13_4way( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t hash[4*8] __attribute__ ((aligned (64))); uint32_t vdata[24*4] __attribute__ ((aligned (64))); @@ -194,9 +194,8 @@ int scanhash_x13_4way( int thr_id, struct work *work, uint32_t max_nonce, uint32_t *ptarget = work->target; uint32_t n = pdata[19]; const uint32_t first_nonce = pdata[19]; - uint32_t *nonces = work->nonces; - int num_found = 0; uint32_t *noncep = vdata + 73; // 9*8 + 1 + int thr_id = mythr->id; // thr_id arg is deprecated const uint32_t Htarg = ptarget[7]; uint64_t htmax[] = { 0, 0xF, 0xFF, 0xFFF, 0xFFFF, 0x10000000 }; @@ -207,7 +206,7 @@ int scanhash_x13_4way( int thr_id, struct work *work, uint32_t max_nonce, swab32_array( endiandata, pdata, 20 ); uint64_t *edata = (uint64_t*)endiandata; - mm256_interleave_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 ); + mm256_intrlv_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 ); for ( int m=0; m < 6; m++ ) if ( Htarg <= htmax[m] ) @@ -224,21 +223,19 @@ int scanhash_x13_4way( int thr_id, struct work *work, uint32_t max_nonce, pdata[19] = n; for ( int i = 0; i < 4; i++ ) - if ( ( ( (hash+(i<<3))[7] & mask ) == 0 ) - && fulltest( hash+(i<<3), ptarget ) ) + if ( ( ( (hash+(i<<3))[7] & mask ) == 0 ) ) + if ( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark ) { pdata[19] = n+i; - nonces[ num_found++ ] = n+i; - work_set_target_ratio( work, hash+(i<<3) ); + submit_lane_solution( work, hash+(i<<3), mythr, i ); } n += 4; - } while ( ( num_found == 0 ) && ( n < max_nonce ) - && !work_restart[thr_id].restart ); + } while ( ( n < max_nonce ) && !work_restart[thr_id].restart ); break; } *hashes_done = n - first_nonce + 1; - return num_found; + return 0; } #endif diff --git a/algo/x13/x13-gate.h b/algo/x13/x13-gate.h index 718810e9..c61d7d6b 100644 --- a/algo/x13/x13-gate.h +++ b/algo/x13/x13-gate.h @@ -14,8 +14,8 @@ bool register_x13_algo( algo_gate_t* gate ); void x13_4way_hash( void *state, const void *input ); -int scanhash_x13_4way( int thr_id, struct work *work, uint32_t max_nonce, - uint64_t *hashes_done ); +int scanhash_x13_4way( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ); void init_x13_4way_ctx(); @@ -23,8 +23,8 @@ void init_x13_4way_ctx(); void x13hash( void *state, const void *input ); -int scanhash_x13( int thr_id, struct work *work, uint32_t max_nonce, - uint64_t *hashes_done ); +int scanhash_x13( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ); void init_x13_ctx(); diff --git a/algo/x13/x13.c b/algo/x13/x13.c index 2f475db0..a55cb9a4 100644 --- a/algo/x13/x13.c +++ b/algo/x13/x13.c @@ -174,8 +174,8 @@ void x13hash(void *output, const void *input) memcpy(output, hashB, 32); } -int scanhash_x13(int thr_id, struct work *work, uint32_t max_nonce, - uint64_t *hashes_done) +int scanhash_x13( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t endiandata[20] __attribute__((aligned(64))); uint32_t hash64[8] __attribute__((aligned(64))); @@ -183,6 +183,7 @@ int scanhash_x13(int thr_id, struct work *work, uint32_t max_nonce, uint32_t *ptarget = work->target; uint32_t n = pdata[19] - 1; const uint32_t first_nonce = pdata[19]; + int thr_id = mythr->id; // thr_id arg is deprecated const uint32_t Htarg = ptarget[7]; uint64_t htmax[] = { diff --git a/algo/x13/x13sm3-4way.c b/algo/x13/x13sm3-4way.c index a297c756..41a909fd 100644 --- a/algo/x13/x13sm3-4way.c +++ b/algo/x13/x13sm3-4way.c @@ -81,7 +81,7 @@ void x13sm3_4way_hash( void *state, const void *input ) bmw512_4way_close( &ctx.bmw, vhash ); // Serial - mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); + mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); // Groestl update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 ); @@ -93,7 +93,7 @@ void x13sm3_4way_hash( void *state, const void *input ) update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3, 512 ); // Parallel 4way - mm256_interleave_4x64( vhash, hash0, hash1, hash2, hash3, 512 ); + mm256_intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 ); // Skein skein512_4way( &ctx.skein, vhash, 64 ); @@ -108,16 +108,16 @@ void x13sm3_4way_hash( void *state, const void *input ) keccak512_4way_close( &ctx.keccak, vhash ); // Serial to the end - mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); + mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); // Luffa - mm256_interleave_2x128( vhash, hash0, hash1, 512 ); + mm256_intrlv_2x128( vhash, hash0, hash1, 512 ); luffa_2way_update_close( &ctx.luffa, vhash, vhash, 64 ); - mm256_deinterleave_2x128( hash0, hash1, vhash, 512 ); - mm256_interleave_2x128( vhash, hash2, hash3, 512 ); + mm256_dintrlv_2x128( hash0, hash1, vhash, 512 ); + mm256_intrlv_2x128( vhash, hash2, hash3, 512 ); luffa_2way_init( &ctx.luffa, 512 ); luffa_2way_update_close( &ctx.luffa, vhash, vhash, 64 ); - mm256_deinterleave_2x128( hash2, hash3, vhash, 512 ); + mm256_dintrlv_2x128( hash2, hash3, vhash, 512 ); // Cubehash cubehashUpdateDigest( &ctx.cube, (byte*)hash0, (const byte*) hash0, 64 ); @@ -145,13 +145,13 @@ void x13sm3_4way_hash( void *state, const void *input ) sph_shavite512_close( &ctx.shavite, hash3 ); // Simd - mm256_interleave_2x128( vhash, hash0, hash1, 512 ); + mm256_intrlv_2x128( vhash, hash0, hash1, 512 ); simd_2way_update_close( &ctx.simd, vhash, vhash, 512 ); - mm256_deinterleave_2x128( hash0, hash1, vhash, 512 ); - mm256_interleave_2x128( vhash, hash2, hash3, 512 ); + mm256_dintrlv_2x128( hash0, hash1, vhash, 512 ); + mm256_intrlv_2x128( vhash, hash2, hash3, 512 ); simd_2way_init( &ctx.simd, 512 ); simd_2way_update_close( &ctx.simd, vhash, vhash, 512 ); - mm256_deinterleave_2x128( hash2, hash3, vhash, 512 ); + mm256_dintrlv_2x128( hash2, hash3, vhash, 512 ); // Echo update_final_echo( &ctx.echo, (BitSequence *)hash0, @@ -166,7 +166,7 @@ void x13sm3_4way_hash( void *state, const void *input ) update_final_echo( &ctx.echo, (BitSequence *)hash3, (const BitSequence *) hash3, 512 ); - mm128_interleave_4x32( vhash, hash0, hash1, hash2, hash3, 512 ); + mm128_intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 512 ); // SM3 parallel 32 bit uint32_t sm3_vhash[32*4] __attribute__ ((aligned (64))); @@ -182,13 +182,13 @@ void x13sm3_4way_hash( void *state, const void *input ) sm3_4way( &ctx.sm3, vhash, 64 ); sm3_4way_close( &ctx.sm3, sm3_vhash ); - mm128_deinterleave_4x32( hash0, hash1, hash2, hash3, sm3_vhash, 512 ); + mm128_dintrlv_4x32( hash0, hash1, hash2, hash3, sm3_vhash, 512 ); // Hamsi parallel 4x32x2 - mm256_interleave_4x64( vhash, hash0, hash1, hash2, hash3, 512 ); + mm256_intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 ); hamsi512_4way( &ctx.hamsi, vhash, 64 ); hamsi512_4way_close( &ctx.hamsi, vhash ); - mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); + mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); // Fugue serial sph_fugue512( &ctx.fugue, hash0, 64 ); @@ -209,8 +209,8 @@ void x13sm3_4way_hash( void *state, const void *input ) memcpy( state+96, hash3, 32 ); } -int scanhash_x13sm3_4way( int thr_id, struct work *work, uint32_t max_nonce, - uint64_t *hashes_done ) +int scanhash_x13sm3_4way( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t hash[4*8] __attribute__ ((aligned (64))); uint32_t vdata[24*4] __attribute__ ((aligned (64))); @@ -219,9 +219,8 @@ int scanhash_x13sm3_4way( int thr_id, struct work *work, uint32_t max_nonce, uint32_t *ptarget = work->target; uint32_t n = pdata[19]; const uint32_t first_nonce = pdata[19]; - uint32_t *nonces = work->nonces; - int num_found = 0; uint32_t *noncep = vdata + 73; // 9*8 + 1 + int thr_id = mythr->id; // thr_id arg is deprecated const uint32_t Htarg = ptarget[7]; uint64_t htmax[] = { 0, 0xF, 0xFF, 0xFFF, 0xFFFF, 0x10000000 }; @@ -232,7 +231,7 @@ int scanhash_x13sm3_4way( int thr_id, struct work *work, uint32_t max_nonce, swab32_array( endiandata, pdata, 20 ); uint64_t *edata = (uint64_t*)endiandata; - mm256_interleave_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 ); + mm256_intrlv_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 ); blake512_4way_init( &x13sm3_ctx_mid ); blake512_4way( &x13sm3_ctx_mid, vdata, 64 ); @@ -252,21 +251,19 @@ int scanhash_x13sm3_4way( int thr_id, struct work *work, uint32_t max_nonce, pdata[19] = n; for ( int i = 0; i < 4; i++ ) - if ( ( ( (hash+(i<<3))[7] & mask ) == 0 ) - && fulltest( hash+(i<<3), ptarget ) ) + if ( ( ( (hash+(i<<3))[7] & mask ) == 0 ) ) + if ( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark ) { pdata[19] = n+i; - nonces[ num_found++ ] = n+i; - work_set_target_ratio( work, hash+(i<<3) ); + submit_lane_solution( work, hash+(i<<3), mythr, i ); } n += 4; - } while ( ( num_found == 0 ) && ( n < max_nonce ) - && !work_restart[thr_id].restart ); + } while ( ( n < max_nonce ) && !work_restart[thr_id].restart ); break; } *hashes_done = n - first_nonce + 1; - return num_found; + return 0; } #endif diff --git a/algo/x13/x13sm3-gate.h b/algo/x13/x13sm3-gate.h index 5399a41b..69210d39 100644 --- a/algo/x13/x13sm3-gate.h +++ b/algo/x13/x13sm3-gate.h @@ -14,8 +14,8 @@ bool register_x13sm3_algo( algo_gate_t* gate ); void x13sm3_4way_hash( void *state, const void *input ); -int scanhash_x13sm3_4way( int thr_id, struct work *work, uint32_t max_nonce, - uint64_t *hashes_done ); +int scanhash_x13sm3_4way( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ); void init_x13sm3_4way_ctx(); @@ -23,8 +23,8 @@ void init_x13sm3_4way_ctx(); void x13sm3_hash( void *state, const void *input ); -int scanhash_x13sm3( int thr_id, struct work *work, uint32_t max_nonce, - uint64_t *hashes_done ); +int scanhash_x13sm3( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ); void init_x13sm3_ctx(); diff --git a/algo/x13/x13sm3.c b/algo/x13/x13sm3.c index 8b50f9c4..8c495d0f 100644 --- a/algo/x13/x13sm3.c +++ b/algo/x13/x13sm3.c @@ -171,8 +171,8 @@ void x13sm3_hash(void *output, const void *input) memcpy(output, hash, 32); } -int scanhash_x13sm3( int thr_id, struct work *work, - uint32_t max_nonce, uint64_t *hashes_done) +int scanhash_x13sm3( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr) { uint32_t endiandata[20] __attribute__((aligned(64))); uint32_t hash64[8] __attribute__((aligned(64))); @@ -180,6 +180,7 @@ int scanhash_x13sm3( int thr_id, struct work *work, uint32_t *ptarget = work->target; uint32_t n = pdata[19] - 1; const uint32_t first_nonce = pdata[19]; + int thr_id = mythr->id; // thr_id arg is deprecated const uint32_t Htarg = ptarget[7]; uint64_t htmax[] = { diff --git a/algo/x14/axiom.c b/algo/x14/axiom.c index b5d2ee7b..7c0b70a9 100644 --- a/algo/x14/axiom.c +++ b/algo/x14/axiom.c @@ -43,14 +43,14 @@ void axiomhash(void *output, const void *input) memcpy(output, M[N-1], 32); } -int scanhash_axiom(int thr_id, struct work *work, +int scanhash_axiom( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t *pdata = work->data; uint32_t *ptarget = work->target; uint32_t _ALIGN(64) hash64[8]; uint32_t _ALIGN(64) endiandata[20]; - /* int */ thr_id = mythr->id; // thr_id arg is deprecated + int thr_id = mythr->id; // thr_id arg is deprecated const uint32_t Htarg = ptarget[7]; const uint32_t first_nonce = pdata[19]; diff --git a/algo/x14/polytimos-4way.c b/algo/x14/polytimos-4way.c index afb2972b..32b2ca61 100644 --- a/algo/x14/polytimos-4way.c +++ b/algo/x14/polytimos-4way.c @@ -100,7 +100,7 @@ void polytimos_4way_hash( void *output, const void *input ) memcpy( output+96, hash3, 32 ); } -int scanhash_polytimos_4way( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_polytimos_4way( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t hash[4*8] __attribute__ ((aligned (64))); @@ -111,7 +111,7 @@ int scanhash_polytimos_4way( int thr_id, struct work *work, uint32_t max_nonce, uint32_t n = first_nonce; __m256i *noncev = (__m256i*)vdata + 9; // aligned const uint32_t Htarg = ptarget[7]; - /* int */ thr_id = mythr->id; // thr_id arg is deprecated + int thr_id = mythr->id; // thr_id arg is deprecated volatile uint8_t *restart = &(work_restart[thr_id].restart); if ( opt_benchmark ) @@ -129,7 +129,7 @@ int scanhash_polytimos_4way( int thr_id, struct work *work, uint32_t max_nonce, if( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark ) { pdata[19] = n+i; - submit_solution( work, hash+(i<<3), mythr, i ); + submit_lane_solution( work, hash+(i<<3), mythr, i ); } n += 4; diff --git a/algo/x14/polytimos-gate.h b/algo/x14/polytimos-gate.h index 85fbc646..080d9dc3 100644 --- a/algo/x14/polytimos-gate.h +++ b/algo/x14/polytimos-gate.h @@ -13,13 +13,13 @@ bool register_polytimos_algo( algo_gate_t* gate ); #if defined(POLYTIMOS_4WAY) void polytimos_4way_hash( void *state, const void *input ); -int scanhash_polytimos_4way( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_polytimos_4way( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ); #endif void polytimos_hash( void *state, const void *input ); -int scanhash_polytimos( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_polytimos( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ); void init_polytimos_ctx(); diff --git a/algo/x14/polytimos.c b/algo/x14/polytimos.c index ab834998..b5a3de7e 100644 --- a/algo/x14/polytimos.c +++ b/algo/x14/polytimos.c @@ -76,14 +76,14 @@ void polytimos_hash(void *output, const void *input) memcpy(output, hashA, 32); } -int scanhash_polytimos( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_polytimos( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t _ALIGN(128) hash[8]; uint32_t _ALIGN(128) endiandata[20]; uint32_t *pdata = work->data; uint32_t *ptarget = work->target; - /* int */ thr_id = mythr->id; // thr_id arg is deprecated + int thr_id = mythr->id; // thr_id arg is deprecated const uint32_t Htarg = ptarget[7]; const uint32_t first_nonce = pdata[19]; uint32_t nonce = first_nonce; diff --git a/algo/x14/veltor-4way.c b/algo/x14/veltor-4way.c index a44c2d39..ff813337 100644 --- a/algo/x14/veltor-4way.c +++ b/algo/x14/veltor-4way.c @@ -77,7 +77,7 @@ void veltor_4way_hash( void *output, const void *input ) memcpy( output+96, hash3, 32 ); } -int scanhash_veltor_4way( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_veltor_4way( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t hash[4*8] __attribute__ ((aligned (64))); @@ -88,10 +88,8 @@ int scanhash_veltor_4way( int thr_id, struct work *work, uint32_t max_nonce, const uint32_t Htarg = ptarget[7]; const uint32_t first_nonce = pdata[19]; uint32_t n = first_nonce; - uint32_t *nonces = work->nonces; - int num_found = 0; uint32_t *noncep = vdata + 73; // 9*8 + 1 - /* int */ thr_id = mythr->id; // thr_id arg is deprecated + int thr_id = mythr->id; // thr_id arg is deprecated volatile uint8_t *restart = &(work_restart[thr_id].restart); if ( opt_benchmark ) @@ -117,13 +115,12 @@ int scanhash_veltor_4way( int thr_id, struct work *work, uint32_t max_nonce, if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) ) { pdata[19] = n+i; - nonces[ num_found++ ] = n+i; - work_set_target_ratio( work, hash+(i<<3) ); + submit_lane_solution( work, hash+(i<<3), mythr, i ); } n += 4; - } while ( ( num_found == 0 ) && ( n < max_nonce ) && !(*restart) ); + } while ( ( n < max_nonce ) && !(*restart) ); *hashes_done = n - first_nonce + 1; - return num_found; + return 0; } #endif diff --git a/algo/x14/veltor-gate.h b/algo/x14/veltor-gate.h index d10781f8..7f97663b 100644 --- a/algo/x14/veltor-gate.h +++ b/algo/x14/veltor-gate.h @@ -14,7 +14,7 @@ bool register_veltor_algo( algo_gate_t* gate ); void veltor_4way_hash( void *state, const void *input ); -int scanhash_veltor_4way( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_veltor_4way( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ); void init_veltor_4way_ctx(); @@ -23,7 +23,7 @@ void init_veltor_4way_ctx(); void veltor_hash( void *state, const void *input ); -int scanhash_veltor( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_veltor( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ); void init_veltor_ctx(); diff --git a/algo/x14/veltor.c b/algo/x14/veltor.c index c5fa3d60..8e95af47 100644 --- a/algo/x14/veltor.c +++ b/algo/x14/veltor.c @@ -61,14 +61,14 @@ void veltor_hash(void *output, const void *input) memcpy(output, hashB, 32); } -int scanhash_veltor( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_veltor( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t _ALIGN(128) hash[8]; uint32_t _ALIGN(128) endiandata[20]; uint32_t *pdata = work->data; uint32_t *ptarget = work->target; - /* int */ thr_id = mythr->id; // thr_id arg is deprecated + int thr_id = mythr->id; // thr_id arg is deprecated const uint32_t Htarg = ptarget[7]; const uint32_t first_nonce = pdata[19]; diff --git a/algo/x14/x14-4way.c b/algo/x14/x14-4way.c index 461ddd0e..fde55f9d 100644 --- a/algo/x14/x14-4way.c +++ b/algo/x14/x14-4way.c @@ -189,7 +189,7 @@ void x14_4way_hash( void *state, const void *input ) } -int scanhash_x14_4way( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_x14_4way( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t hash[4*16] __attribute__ ((aligned (64))); @@ -199,11 +199,9 @@ int scanhash_x14_4way( int thr_id, struct work *work, uint32_t max_nonce, uint32_t *ptarget = work->target; uint32_t n = pdata[19]; const uint32_t first_nonce = pdata[19]; - uint32_t *nonces = work->nonces; - int num_found = 0; uint32_t *noncep = vdata + 73; // 9*8 + 1 const uint32_t Htarg = ptarget[7]; - /* int */ thr_id = mythr->id; // thr_id arg is deprecated + int thr_id = mythr->id; // thr_id arg is deprecated uint64_t htmax[] = { 0, 0xF, 0xFF, 0xFFF, 0xFFFF, 0x10000000 }; uint32_t masks[] = { 0xFFFFFFFF, 0xFFFFFFF0, 0xFFFFFF00, @@ -238,21 +236,18 @@ int scanhash_x14_4way( int thr_id, struct work *work, uint32_t max_nonce, uint32_t lane_hash[8]; mm128_extract_lane_4x32( lane_hash, hash, lane, 256 ); - if ( fulltest( lane_hash, ptarget ) ) + if ( fulltest( lane_hash, ptarget ) && !opt_benchmark ) { pdata[19] = n + lane; - nonces[ num_found++ ] = n + lane; - work_set_target_ratio( work, lane_hash ); + submit_lane_solution( work, lane_hash, mythr, lane ); } } n += 4; - } while ( ( num_found == 0 ) && ( n < max_nonce ) - && !work_restart[thr_id].restart ); + } while ( ( n < max_nonce ) && !work_restart[thr_id].restart ); break; } - *hashes_done = n - first_nonce + 1; - return num_found; + return 0; } #endif diff --git a/algo/x14/x14-gate.h b/algo/x14/x14-gate.h index 18b00160..9df974fc 100644 --- a/algo/x14/x14-gate.h +++ b/algo/x14/x14-gate.h @@ -13,14 +13,14 @@ bool register_x14_algo( algo_gate_t* gate ); #if defined(X14_4WAY) void x14_4way_hash( void *state, const void *input ); -int scanhash_x14_4way( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_x14_4way( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ); void init_x14_4way_ctx(); #endif void x14hash( void *state, const void *input ); -int scanhash_x14( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_x14( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ); void init_x14_ctx(); diff --git a/algo/x14/x14.c b/algo/x14/x14.c index effd8fdb..771805c1 100644 --- a/algo/x14/x14.c +++ b/algo/x14/x14.c @@ -180,7 +180,7 @@ void x14hash(void *output, const void *input) memcpy(output, hash, 32); } -int scanhash_x14( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_x14( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t endiandata[20] __attribute__((aligned(64))); @@ -190,7 +190,7 @@ int scanhash_x14( int thr_id, struct work *work, uint32_t max_nonce, uint32_t n = pdata[19] - 1; const uint32_t first_nonce = pdata[19]; const uint32_t Htarg = ptarget[7]; - /* int */ thr_id = mythr->id; // thr_id arg is deprecated + int thr_id = mythr->id; // thr_id arg is deprecated uint64_t htmax[] = { 0, diff --git a/algo/x15/x15-4way.c b/algo/x15/x15-4way.c index 5635acf1..441eaada 100644 --- a/algo/x15/x15-4way.c +++ b/algo/x15/x15-4way.c @@ -213,7 +213,7 @@ void x15_4way_hash( void *state, const void *input ) memcpy( state+96, hash3, 32 ); } -int scanhash_x15_4way( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_x15_4way( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t hash[4*8] __attribute__ ((aligned (64))); @@ -223,11 +223,9 @@ int scanhash_x15_4way( int thr_id, struct work *work, uint32_t max_nonce, uint32_t *ptarget = work->target; uint32_t n = pdata[19]; const uint32_t first_nonce = pdata[19]; - uint32_t *nonces = work->nonces; - int num_found = 0; uint32_t *noncep = vdata + 73; // 9*8 + 1 const uint32_t Htarg = ptarget[7]; - /* int */ thr_id = mythr->id; // thr_id arg is deprecated + int thr_id = mythr->id; // thr_id arg is deprecated uint64_t htmax[] = { 0, 0xF, 0xFF, 0xFFF, 0xFFFF, 0x10000000 }; uint32_t masks[] = { 0xFFFFFFFF, 0xFFFFFFF0, 0xFFFFFF00, @@ -254,21 +252,19 @@ int scanhash_x15_4way( int thr_id, struct work *work, uint32_t max_nonce, pdata[19] = n; for ( int i = 0; i < 4; i++ ) - if ( ( ( (hash+(i<<3))[7] & mask ) == 0 ) - && fulltest( hash+(i<<3), ptarget ) ) + if ( ( (hash+(i<<3))[7] & mask ) == 0 ) + if ( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark ) { pdata[19] = n+i; - nonces[ num_found++ ] = n+i; - work_set_target_ratio( work, hash+(i<<3) ); + submit_lane_solution( work, hash, mythr, i ); } n += 4; - } while ( ( num_found == 0 ) && ( n < max_nonce ) - && !work_restart[thr_id].restart ); + } while ( ( n < max_nonce ) && !work_restart[thr_id].restart ); break; } *hashes_done = n - first_nonce + 1; - return num_found; + return 0; } #endif diff --git a/algo/x15/x15-gate.h b/algo/x15/x15-gate.h index eefccf67..8224fe22 100644 --- a/algo/x15/x15-gate.h +++ b/algo/x15/x15-gate.h @@ -13,14 +13,14 @@ bool register_x15_algo( algo_gate_t* gate ); #if defined(X15_4WAY) void x15_4way_hash( void *state, const void *input ); -int scanhash_x15_4way( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_x15_4way( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ); void init_x15_4way_ctx(); #endif void x15hash( void *state, const void *input ); -int scanhash_x15( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_x15( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ); void init_x15_ctx(); diff --git a/algo/x15/x15.c b/algo/x15/x15.c index b3a71801..29baafe9 100644 --- a/algo/x15/x15.c +++ b/algo/x15/x15.c @@ -186,7 +186,7 @@ void x15hash(void *output, const void *input) memcpy(output, hashB, 32); } -int scanhash_x15( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_x15( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t endiandata[20] __attribute__((aligned(64))); @@ -196,7 +196,7 @@ int scanhash_x15( int thr_id, struct work *work, uint32_t max_nonce, uint32_t n = pdata[19] - 1; const uint32_t first_nonce = pdata[19]; const uint32_t Htarg = ptarget[7]; - /* int */ thr_id = mythr->id; // thr_id arg is deprecated + int thr_id = mythr->id; // thr_id arg is deprecated uint64_t htmax[] = { 0, @@ -246,9 +246,7 @@ int scanhash_x15( int thr_id, struct work *work, uint32_t max_nonce, if (!(hash64[7] & mask)) { printf("[%d]",thr_id); if (fulltest(hash64, ptarget)) { - work_set_target_ratio( work, hash64 ); - *hashes_done = n - first_nonce + 1; - return true; + submit_solution( work, hash64, mythr ); } } #endif diff --git a/algo/x16/x16r-4way.c b/algo/x16/x16r-4way.c index b254f52d..d31e5694 100644 --- a/algo/x16/x16r-4way.c +++ b/algo/x16/x16r-4way.c @@ -284,7 +284,7 @@ void x16r_4way_hash( void* output, const void* input ) memcpy( output+96, hash3, 32 ); } -int scanhash_x16r_4way( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_x16r_4way( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr) { uint32_t hash[4*16] __attribute__ ((aligned (64))); @@ -295,7 +295,7 @@ int scanhash_x16r_4way( int thr_id, struct work *work, uint32_t max_nonce, const uint32_t Htarg = ptarget[7]; const uint32_t first_nonce = pdata[19]; uint32_t n = first_nonce; - /* int */ thr_id = mythr->id; // thr_id arg is deprecated + int thr_id = mythr->id; // thr_id arg is deprecated __m256i *noncev = (__m256i*)vdata + 9; // aligned volatile uint8_t *restart = &(work_restart[thr_id].restart); @@ -330,7 +330,7 @@ int scanhash_x16r_4way( int thr_id, struct work *work, uint32_t max_nonce, if( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark ) { pdata[19] = n+i; - submit_solution( work, hash+(i<<3), mythr, i ); + submit_lane_solution( work, hash+(i<<3), mythr, i ); } n += 4; } while ( ( n < max_nonce ) && !(*restart) ); diff --git a/algo/x16/x16r-gate.h b/algo/x16/x16r-gate.h index 88ff56a0..df5d5a99 100644 --- a/algo/x16/x16r-gate.h +++ b/algo/x16/x16r-gate.h @@ -39,13 +39,13 @@ bool register_x16s_algo( algo_gate_t* gate ); #if defined(X16R_4WAY) void x16r_4way_hash( void *state, const void *input ); -int scanhash_x16r_4way( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_x16r_4way( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ); #endif void x16r_hash( void *state, const void *input ); -int scanhash_x16r( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_x16r( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ); #endif diff --git a/algo/x16/x16r.c b/algo/x16/x16r.c index 4f8c077e..2ed393d5 100644 --- a/algo/x16/x16r.c +++ b/algo/x16/x16r.c @@ -178,7 +178,7 @@ void x16r_hash( void* output, const void* input ) memcpy(output, hash, 32); } -int scanhash_x16r( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_x16r( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t _ALIGN(128) hash32[8]; @@ -187,7 +187,7 @@ int scanhash_x16r( int thr_id, struct work *work, uint32_t max_nonce, uint32_t *ptarget = work->target; const uint32_t Htarg = ptarget[7]; const uint32_t first_nonce = pdata[19]; - /* int */ thr_id = mythr->id; // thr_id arg is deprecated + int thr_id = mythr->id; // thr_id arg is deprecated uint32_t nonce = first_nonce; volatile uint8_t *restart = &(work_restart[thr_id].restart); @@ -214,17 +214,14 @@ int scanhash_x16r( int thr_id, struct work *work, uint32_t max_nonce, be32enc( &endiandata[19], nonce ); x16r_hash( hash32, endiandata ); - if ( hash32[7] <= Htarg && fulltest( hash32, ptarget ) ) + if ( hash32[7] <= Htarg ) + if (fulltest( hash32, ptarget ) && !opt_benchmark ) { - work_set_target_ratio( work, hash32 ); pdata[19] = nonce; - *hashes_done = pdata[19] - first_nonce; - return 1; + submit_solution( work, hash32, mythr ); } nonce++; - } while ( nonce < max_nonce && !(*restart) ); - pdata[19] = nonce; *hashes_done = pdata[19] - first_nonce + 1; return 0; diff --git a/algo/x17/sonoa-4way.c b/algo/x17/sonoa-4way.c index c670c983..8f6a8c35 100644 --- a/algo/x17/sonoa-4way.c +++ b/algo/x17/sonoa-4way.c @@ -803,7 +803,7 @@ void sonoa_4way_hash( void *state, const void *input ) haval256_5_4way_close( &ctx.haval, state ); } -int scanhash_sonoa_4way( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_sonoa_4way( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t hash[4*8] __attribute__ ((aligned (64))); @@ -816,7 +816,7 @@ int scanhash_sonoa_4way( int thr_id, struct work *work, uint32_t max_nonce, const uint32_t first_nonce = pdata[19]; __m256i *noncev = (__m256i*)vdata + 9; // aligned const uint32_t Htarg = ptarget[7]; - /* int */ thr_id = mythr->id; // thr_id arg is deprecated + int thr_id = mythr->id; // thr_id arg is deprecated uint64_t htmax[] = { 0, 0xF, 0xFF, 0xFFF, 0xFFFF, 0x10000000 }; uint32_t masks[] = { 0xFFFFFFFF, 0xFFFFFFF0, 0xFFFFFF00, @@ -841,7 +841,7 @@ int scanhash_sonoa_4way( int thr_id, struct work *work, uint32_t max_nonce, if ( fulltest( lane_hash, ptarget ) && !opt_benchmark ) { pdata[19] = n + lane; - submit_solution( work, lane_hash, mythr, lane ); + submit_lane_solution( work, lane_hash, mythr, lane ); } } n += 4; diff --git a/algo/x17/sonoa-gate.h b/algo/x17/sonoa-gate.h index 05f03ffa..c97a3750 100644 --- a/algo/x17/sonoa-gate.h +++ b/algo/x17/sonoa-gate.h @@ -14,7 +14,7 @@ bool register_sonoa_algo( algo_gate_t* gate ); void sonoa_4way_hash( void *state, const void *input ); -int scanhash_sonoa_4way( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_sonoa_4way( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ); //void init_sonoa_4way_ctx(); @@ -23,7 +23,7 @@ int scanhash_sonoa_4way( int thr_id, struct work *work, uint32_t max_nonce, void sonoa_hash( void *state, const void *input ); -int scanhash_sonoa( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_sonoa( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ); void init_sonoa_ctx(); diff --git a/algo/x17/sonoa.c b/algo/x17/sonoa.c index 5ace9274..ce1c0fc6 100644 --- a/algo/x17/sonoa.c +++ b/algo/x17/sonoa.c @@ -564,17 +564,17 @@ void sonoa_hash( void *state, const void *input ) memcpy(state, hash, 32); } -int scanhash_sonoa( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_sonoa( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ) { -uint32_t _ALIGN(128) hash32[8]; -uint32_t _ALIGN(128) endiandata[20]; -uint32_t *pdata = work->data; -uint32_t *ptarget = work->target; -const uint32_t first_nonce = pdata[19]; -const uint32_t Htarg = ptarget[7]; -uint32_t n = pdata[19] - 1; -/* int */ thr_id = mythr->id; // thr_id arg is deprecated + uint32_t _ALIGN(128) hash32[8]; + uint32_t _ALIGN(128) endiandata[20]; + uint32_t *pdata = work->data; + uint32_t *ptarget = work->target; + const uint32_t first_nonce = pdata[19]; + const uint32_t Htarg = ptarget[7]; + uint32_t n = pdata[19] - 1; + int thr_id = mythr->id; // thr_id arg is deprecated uint64_t htmax[] = { @@ -603,45 +603,20 @@ uint32_t n = pdata[19] - 1; casti_m128i( endiandata, 3 ) = mm128_bswap_32( casti_m128i( pdata, 3 ) ); casti_m128i( endiandata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) ); -#ifdef DEBUG_ALGO - printf("[%d] Htarg=%X\n", thr_id, Htarg); -#endif - for ( int m = 0; m < 6; m++ ) + for ( int m = 0; m < 6; m++ ) if ( Htarg <= htmax[m] ) { - if ( Htarg <= htmax[m] ) - { - uint32_t mask = masks[m]; - do - { - pdata[19] = ++n; - be32enc(&endiandata[19], n); - sonoa_hash(hash32, endiandata); -#ifndef DEBUG_ALGO - if ( ( !( hash32[7] & mask ) ) && fulltest( hash32, ptarget ) ) - { - work_set_target_ratio( work, hash32 ); - *hashes_done = n - first_nonce + 1; - return 1; - } -#else - if (!(n % 0x1000) && !thr_id) printf("."); - if ( !(hash32[7] & mask) ) - { - printf("[%d]",thr_id); - if ( fulltest( hash32, ptarget ) ) - { - work_set_target_ratio( work, hash32 ); - *hashes_done = n - first_nonce + 1; - return 1; - } - } -#endif + uint32_t mask = masks[m]; + do + { + pdata[19] = ++n; + be32enc(&endiandata[19], n); + sonoa_hash(hash32, endiandata); + if ( !( hash32[7] & mask ) ) + if ( fulltest( hash32, ptarget ) && !opt_benchmark ) + submit_solution( work, hash32, mythr ); } while (n < max_nonce && !work_restart[thr_id].restart); - // see blake.c if else to understand the loop on htmax => mask - break; + break; } - } - *hashes_done = n - first_nonce + 1; pdata[19] = n; return 0; diff --git a/algo/x17/x17-4way.c b/algo/x17/x17-4way.c index 5bbf49c8..719e2300 100644 --- a/algo/x17/x17-4way.c +++ b/algo/x17/x17-4way.c @@ -202,7 +202,7 @@ void x17_4way_hash( void *state, const void *input ) haval256_5_4way_close( &ctx.haval, state ); } -int scanhash_x17_4way( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_x17_4way( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t hash[4*8] __attribute__ ((aligned (64))); @@ -214,7 +214,7 @@ int scanhash_x17_4way( int thr_id, struct work *work, uint32_t max_nonce, uint32_t n = pdata[19]; const uint32_t first_nonce = pdata[19]; __m256i *noncev = (__m256i*)vdata + 9; // aligned - /* int */ thr_id = mythr->id; // thr_id arg is deprecated + int thr_id = mythr->id; // thr_id arg is deprecated const uint32_t Htarg = ptarget[7]; uint64_t htmax[] = { 0, 0xF, 0xFF, 0xFFF, 0xFFFF, 0x10000000 }; @@ -239,7 +239,7 @@ int scanhash_x17_4way( int thr_id, struct work *work, uint32_t max_nonce, if ( fulltest( lane_hash, ptarget ) && !opt_benchmark ) { pdata[19] = n + lane; - submit_solution( work, lane_hash, mythr, lane ); + submit_lane_solution( work, lane_hash, mythr, lane ); } } n += 4; diff --git a/algo/x17/x17-gate.h b/algo/x17/x17-gate.h index a0b9b81c..9a40b349 100644 --- a/algo/x17/x17-gate.h +++ b/algo/x17/x17-gate.h @@ -13,13 +13,13 @@ bool register_x17_algo( algo_gate_t* gate ); #if defined(X17_4WAY) void x17_4way_hash( void *state, const void *input ); -int scanhash_x17_4way( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_x17_4way( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ); #endif void x17_hash( void *state, const void *input ); -int scanhash_x17( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_x17( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ); #endif diff --git a/algo/x17/x17.c b/algo/x17/x17.c index 228cb696..c4ddc760 100644 --- a/algo/x17/x17.c +++ b/algo/x17/x17.c @@ -181,7 +181,7 @@ void x17_hash(void *output, const void *input) sph_haval256_5_close( &ctx.haval, output ); } -int scanhash_x17( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_x17( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr) { uint32_t endiandata[20] __attribute__((aligned(64))); @@ -191,7 +191,7 @@ int scanhash_x17( int thr_id, struct work *work, uint32_t max_nonce, uint32_t n = pdata[19] - 1; const uint32_t first_nonce = pdata[19]; const uint32_t Htarg = ptarget[7]; - /* int */ thr_id = mythr->id; // thr_id arg is deprecated + int thr_id = mythr->id; // thr_id arg is deprecated uint64_t htmax[] = { @@ -219,10 +219,6 @@ int scanhash_x17( int thr_id, struct work *work, uint32_t max_nonce, casti_m128i( endiandata, 3 ) = mm128_bswap_32( casti_m128i( pdata, 3 ) ); casti_m128i( endiandata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) ); -#ifdef DEBUG_ALGO - if ( Htarg != 0 ) - printf( "[%d] Htarg=%X\n", thr_id, Htarg ); -#endif for ( int m = 0; m < 6; m++ ) { if ( Htarg <= htmax[m] ) @@ -233,32 +229,10 @@ int scanhash_x17( int thr_id, struct work *work, uint32_t max_nonce, pdata[19] = ++n; be32enc( &endiandata[19], n ); x17_hash( hash64, endiandata ); -#ifndef DEBUG_ALGO - if ( !( hash64[7] & mask ) ) - { - if ( fulltest( hash64, ptarget ) ) - { - *hashes_done = n - first_nonce + 1; - return true; - } -// else -// applog(LOG_INFO, "Result does not validate on CPU!"); - } -#else - if ( !( n % 0x1000 ) && !thr_id ) printf("."); if ( !( hash64[7] & mask ) ) - { - printf("[%d]",thr_id); - if ( fulltest( hash64, ptarget ) ) - { - work_set_target_ratio( work, hash64 ); - *hashes_done = n - first_nonce + 1; - return true; - } - } -#endif - } while (n < max_nonce && !work_restart[thr_id].restart); - // see blake.c if else to understand the loop on htmax => mask + if ( fulltest( hash64, ptarget ) && !opt_benchmark ) + submit_solution( work, hash64, mythr ); + } while ( n < max_nonce && !work_restart[thr_id].restart); break; } } diff --git a/algo/x17/xevan-4way.c b/algo/x17/xevan-4way.c index 8045e367..f406a8b2 100644 --- a/algo/x17/xevan-4way.c +++ b/algo/x17/xevan-4way.c @@ -329,7 +329,7 @@ void xevan_4way_hash( void *output, const void *input ) haval256_5_4way_close( &ctx.haval, output ); } -int scanhash_xevan_4way( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_xevan_4way( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t hash[4*8] __attribute__ ((aligned (64))); @@ -338,7 +338,7 @@ int scanhash_xevan_4way( int thr_id, struct work *work, uint32_t max_nonce, uint32_t vdata[24*4] __attribute__ ((aligned (64))); uint32_t *pdata = work->data; uint32_t *ptarget = work->target; - /* int */ thr_id = mythr->id; // thr_id arg is deprecated + int thr_id = mythr->id; // thr_id arg is deprecated __m256i *noncev = (__m256i*)vdata + 9; // aligned const uint32_t Htarg = ptarget[7]; @@ -361,7 +361,7 @@ int scanhash_xevan_4way( int thr_id, struct work *work, uint32_t max_nonce, if ( fulltest( lane_hash, ptarget ) && !opt_benchmark ) { pdata[19] = n + lane; - submit_solution( work, lane_hash, mythr, lane ); + submit_lane_solution( work, lane_hash, mythr, lane ); } } n += 4; diff --git a/algo/x17/xevan-gate.h b/algo/x17/xevan-gate.h index f4bc38f6..c614c0bc 100644 --- a/algo/x17/xevan-gate.h +++ b/algo/x17/xevan-gate.h @@ -14,7 +14,7 @@ bool register_xevan_algo( algo_gate_t* gate ); void xevan_4way_hash( void *state, const void *input ); -int scanhash_xevan_4way( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_xevan_4way( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ); //void init_xevan_4way_ctx(); @@ -23,7 +23,7 @@ int scanhash_xevan_4way( int thr_id, struct work *work, uint32_t max_nonce, void xevan_hash( void *state, const void *input ); -int scanhash_xevan( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_xevan( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ); void init_xevan_ctx(); diff --git a/algo/x17/xevan.c b/algo/x17/xevan.c index 53ac3d99..b351eb35 100644 --- a/algo/x17/xevan.c +++ b/algo/x17/xevan.c @@ -230,14 +230,14 @@ void xevan_hash(void *output, const void *input) memcpy(output, hash, 32); } -int scanhash_xevan( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_xevan( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t _ALIGN(64) hash[8]; uint32_t _ALIGN(64) endiandata[20]; uint32_t *pdata = work->data; uint32_t *ptarget = work->target; - /* int */ thr_id = mythr->id; // thr_id arg is deprecated + int thr_id = mythr->id; // thr_id arg is deprecated const uint32_t Htarg = ptarget[7]; const uint32_t first_nonce = pdata[19]; uint32_t nonce = first_nonce; @@ -254,15 +254,14 @@ int scanhash_xevan( int thr_id, struct work *work, uint32_t max_nonce, be32enc(&endiandata[19], nonce); xevan_hash(hash, endiandata); - if (hash[7] <= Htarg && fulltest(hash, ptarget)) { - work_set_target_ratio(work, hash); - pdata[19] = nonce; - *hashes_done = pdata[19] - first_nonce; - return 1; + if (hash[7] <= Htarg ) + if ( fulltest( hash, ptarget ) && !opt_benchmark ) + { + pdata[19] = nonce; + submit_solution( work, hash, mythr ); } nonce++; - - } while (nonce < max_nonce && !(*restart)); + } while ( nonce < max_nonce && !(*restart) ); pdata[19] = nonce; *hashes_done = pdata[19] - first_nonce + 1; diff --git a/algo/yescrypt/yescrypt.c b/algo/yescrypt/yescrypt.c index 4ec45364..441442e3 100644 --- a/algo/yescrypt/yescrypt.c +++ b/algo/yescrypt/yescrypt.c @@ -382,7 +382,7 @@ void yescrypthash(void *output, const void *input) yescrypt_hash((char*) input, (char*) output, 80); } -int scanhash_yescrypt( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_yescrypt( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t _ALIGN(64) vhash[8]; @@ -393,7 +393,7 @@ int scanhash_yescrypt( int thr_id, struct work *work, uint32_t max_nonce, const uint32_t Htarg = ptarget[7]; const uint32_t first_nonce = pdata[19]; uint32_t n = first_nonce; - /* int */ thr_id = mythr->id; // thr_id arg is deprecated + int thr_id = mythr->id; // thr_id arg is deprecated for (int k = 0; k < 19; k++) be32enc(&endiandata[k], pdata[k]); diff --git a/algo/yespower/yespower.c b/algo/yespower/yespower.c index 8b700a46..b532b661 100644 --- a/algo/yespower/yespower.c +++ b/algo/yespower/yespower.c @@ -37,7 +37,7 @@ void yespower_hash( const char *input, char *output, uint32_t len ) yespower_tls( input, len, &yespower_params, (yespower_binary_t*)output ); } -int scanhash_yespower( int thr_id, struct work *work, uint32_t max_nonce, +int scanhash_yespower( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t _ALIGN(64) vhash[8]; @@ -48,11 +48,10 @@ int scanhash_yespower( int thr_id, struct work *work, uint32_t max_nonce, const uint32_t Htarg = ptarget[7]; const uint32_t first_nonce = pdata[19]; uint32_t n = first_nonce; - /* int */ thr_id = mythr->id; // thr_id arg is deprecated + int thr_id = mythr->id; // thr_id arg is deprecated for (int k = 0; k < 19; k++) be32enc(&endiandata[k], pdata[k]); - do { be32enc(&endiandata[19], n); yespower_hash((char*) endiandata, (char*) vhash, 80); @@ -119,7 +118,7 @@ int64_t yescryptr16_05_get_max64() bool register_yescrypt_05_algo( algo_gate_t* gate ) { - gate->optimizations = SSE2_OPT; + gate->optimizations = SSE2_OPT | SHA_OPT; gate->scanhash = (void*)&scanhash_yespower; gate->set_target = (void*)&scrypt_set_target; gate->get_max64 = (void*)&yescrypt_05_get_max64; @@ -133,7 +132,7 @@ bool register_yescrypt_05_algo( algo_gate_t* gate ) bool register_yescryptr8_05_algo( algo_gate_t* gate ) { - gate->optimizations = SSE2_OPT; + gate->optimizations = SSE2_OPT | SHA_OPT; gate->scanhash = (void*)&scanhash_yespower; gate->set_target = (void*)&scrypt_set_target; gate->get_max64 = (void*)&yescrypt_05_get_max64; @@ -147,7 +146,7 @@ bool register_yescryptr8_05_algo( algo_gate_t* gate ) bool register_yescryptr16_05_algo( algo_gate_t* gate ) { - gate->optimizations = SSE2_OPT; + gate->optimizations = SSE2_OPT | SHA_OPT; gate->scanhash = (void*)&scanhash_yespower; gate->set_target = (void*)&scrypt_set_target; gate->get_max64 = (void*)&yescryptr16_05_get_max64; @@ -161,7 +160,7 @@ bool register_yescryptr16_05_algo( algo_gate_t* gate ) bool register_yescryptr32_05_algo( algo_gate_t* gate ) { - gate->optimizations = SSE2_OPT; + gate->optimizations = SSE2_OPT | SHA_OPT; gate->scanhash = (void*)&scanhash_yespower; gate->set_target = (void*)&scrypt_set_target; gate->get_max64 = (void*)&yescryptr16_05_get_max64; diff --git a/configure b/configure index 6430e3bf..8dac7a0e 100755 --- a/configure +++ b/configure @@ -1,6 +1,6 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.9.4. +# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.9.5. # # # Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc. @@ -577,8 +577,8 @@ MAKEFLAGS= # Identity of this package. PACKAGE_NAME='cpuminer-opt' PACKAGE_TARNAME='cpuminer-opt' -PACKAGE_VERSION='3.9.4' -PACKAGE_STRING='cpuminer-opt 3.9.4' +PACKAGE_VERSION='3.9.5' +PACKAGE_STRING='cpuminer-opt 3.9.5' PACKAGE_BUGREPORT='' PACKAGE_URL='' @@ -1332,7 +1332,7 @@ if test "$ac_init_help" = "long"; then # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -\`configure' configures cpuminer-opt 3.9.4 to adapt to many kinds of systems. +\`configure' configures cpuminer-opt 3.9.5 to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... @@ -1404,7 +1404,7 @@ fi if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of cpuminer-opt 3.9.4:";; + short | recursive ) echo "Configuration of cpuminer-opt 3.9.5:";; esac cat <<\_ACEOF @@ -1509,7 +1509,7 @@ fi test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF -cpuminer-opt configure 3.9.4 +cpuminer-opt configure 3.9.5 generated by GNU Autoconf 2.69 Copyright (C) 2012 Free Software Foundation, Inc. @@ -2012,7 +2012,7 @@ cat >config.log <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by cpuminer-opt $as_me 3.9.4, which was +It was created by cpuminer-opt $as_me 3.9.5, which was generated by GNU Autoconf 2.69. Invocation command line was $ $0 $@ @@ -2993,7 +2993,7 @@ fi # Define the identity of the package. PACKAGE='cpuminer-opt' - VERSION='3.9.4' + VERSION='3.9.5' cat >>confdefs.h <<_ACEOF @@ -6690,7 +6690,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" -This file was extended by cpuminer-opt $as_me 3.9.4, which was +This file was extended by cpuminer-opt $as_me 3.9.5, which was generated by GNU Autoconf 2.69. Invocation command line was CONFIG_FILES = $CONFIG_FILES @@ -6756,7 +6756,7 @@ _ACEOF cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`" ac_cs_version="\\ -cpuminer-opt config.status 3.9.4 +cpuminer-opt config.status 3.9.5 configured by $0, generated by GNU Autoconf 2.69, with options \\"\$ac_cs_config\\" diff --git a/configure.ac b/configure.ac index 9707a8de..7d0fab4d 100644 --- a/configure.ac +++ b/configure.ac @@ -1,4 +1,4 @@ -AC_INIT([cpuminer-opt], [3.9.4]) +AC_INIT([cpuminer-opt], [3.9.5]) AC_PREREQ([2.59c]) AC_CANONICAL_SYSTEM diff --git a/cpu-miner.c b/cpu-miner.c index 1fe5c6ce..56c100d8 100644 --- a/cpu-miner.c +++ b/cpu-miner.c @@ -142,6 +142,7 @@ char *rpc2_job_id = NULL; double opt_diff_factor = 1.0; uint32_t zr5_pok = 0; bool opt_stratum_stats = false; +bool opt_hash_meter = false; uint32_t accepted_share_count = 0ULL; uint32_t rejected_share_count = 0ULL; @@ -809,156 +810,157 @@ static bool gbt_work_decode( const json_t *val, struct work *work ) void scale_hash_for_display ( double* hashrate, char* units ) { - if ( *hashrate < 1e4 ) - // 0 H/s to 9999 H/s - *units = 0; - else if ( *hashrate < 1e7 ) - { - // 10 kH/s to 9999 kH/s - *units = 'k'; - *hashrate /= 1e3; - } - else if ( *hashrate < 1e10 ) - { - // 10 Mh/s to 9999 Mh/s - *units = 'M'; - *hashrate /= 1e6; - } - else if ( *hashrate < 1e13 ) - { - // 10 iGh/s to 9999 Gh/s - *units = 'G'; - *hashrate /= 1e9; - } - else - { - // 10 Th/s and higher - *units = 'T'; - *hashrate /= 1e12; - } -} + if ( *hashrate < 1e4 ) // 0 H/s to 9999 H/s + *units = 0; + else if ( *hashrate < 1e7 ) // 10 kH/s to 9999 kH/s + { *units = 'k'; *hashrate /= 1e3; } + else if ( *hashrate < 1e10 ) // 10 Mh/s to 9999 Mh/s + { *units = 'M'; *hashrate /= 1e6; } + else if ( *hashrate < 1e13 ) // 10 Gh/s to 9999 Gh/s + { *units = 'G'; *hashrate /= 1e9; } + else if ( *hashrate < 1e16 ) // 10 Th/s to 9999 Th/s + { *units = 'T'; *hashrate /= 1e12; } + else // 10 Ph/s and higher + { *units = 'P'; *hashrate /= 1e15; } +} + +// Bitcoin formula for converting a share's difficulty to an equivalent +// number of hashes. +// +// https://en.bitcoin.it/wiki/Difficulty +// +// H = D * 2**48 / 0xffff +// = D * 2**32 +// +// That formula doesn't seem to be accurate but an adjustment to the +// constant produces correct results. +// +// The formula used is: +// +// hash = sharediff * 2**48 / 0x3fff +// = sharediff * 2**30 +// = sharediff * diff2hash + +const uint64_t diff2hash = 0x40000000ULL; + +static struct timeval submit_time, prev_submit_time; +static struct timeval submit_interval; +static struct timeval five_min_start; +static double shash_sum = 0.; +static double bhash_sum = 0.; +static double time_sum = 0.; +static double latency_sum = 0.; +static uint64_t submits_sum = 0; static int share_result( int result, struct work *work, const char *reason ) { - char hc[16]; char hr[16]; const char *sres; double hashcount = 0.; double hashrate = 0.; - char hc_units[4] = {0}; char hr_units[4] = {0}; - uint32_t total_submits; - float rate; - char rate_s[8] = {0}; + bool solved; + char shr[16]; + char shr_units[4] = {0}; + char diffstr[32]; + struct timeval ack_time, latency_tv; + uint64_t latency; + double share_time, share_hash, block_hash; double sharediff = work ? work->sharediff : stratum.sharediff; - bool solved = result && accepted_share_count && (net_diff > 0.0 ) - && ( sharediff >= net_diff ); - char sol[32] = {0}; - int i; + double share_size; - pthread_mutex_lock(&stats_lock); - for (i = 0; i < opt_n_threads; i++) + pthread_mutex_lock( &stats_lock ); + for ( int i = 0; i < opt_n_threads; i++ ) { hashcount += thr_hashcount[i]; hashrate += thr_hashrates[i]; } - solved = result && ( (uint64_t)hashcount > 0 ) && (net_diff > 0.0 ) - && ( sharediff >= net_diff ); - result ? accepted_share_count++ : rejected_share_count++; - if ( solved ) - { - solved_block_count++; - if ( use_colors ) - sprintf( sol, CL_GRN " Solved: %d" CL_WHT, solved_block_count ); - else - sprintf( sol, ", Solved: %d", solved_block_count ); - } + // calculate latency + gettimeofday( &ack_time, NULL ); + timeval_subtract( &latency_tv, &ack_time, &submit_time ); + latency = ( latency_tv.tv_sec * 1000 + latency_tv.tv_usec / 1000 ); - pthread_mutex_unlock(&stats_lock); - global_hashcount = hashcount; - global_hashrate = hashrate; - total_submits = accepted_share_count + rejected_share_count; + // calculate share hashrate and size + share_time = submit_interval.tv_sec + ( submit_interval.tv_usec / 1000000. ); + share_hash = sharediff * diff2hash; + block_hash = net_diff * diff2hash; + share_size = block_hash == 0. ? 0. : share_hash / block_hash; - rate = ( result ? ( 100. * accepted_share_count / total_submits ) - : ( 100. * rejected_share_count / total_submits ) ); + // update counters for 5 minute summary report + shash_sum += share_hash; + bhash_sum += block_hash; + time_sum += share_time; + submits_sum ++; + latency_sum += latency; - if (use_colors) - { - sres = (result ? CL_GRN "Accepted" CL_WHT : CL_RED "Rejected" CL_WHT ); - } - else - { - sres = (result ? "Accepted" : "Rejected" ); - } + pthread_mutex_unlock( &stats_lock ); - // Contrary to rounding convention 100% means zero rejects, exactly 100%. - // Rates > 99% and < 100% (rejects>0) display 99.9%. - if ( result ) - { - rate = 100. * accepted_share_count / total_submits; - if ( rate == 100.0 ) - sprintf( rate_s, "%.0f", rate ); + double share_hash_rate = share_time == 0. ? 0. : share_hash / share_time; + + result ? accepted_share_count++ : rejected_share_count++; + global_hashcount = hashcount; + global_hashrate = hashrate; + + // check for solved block + solved = result && (net_diff > 0.0 ) && ( sharediff >= net_diff ); + solved_block_count += solved ? 1 : 0 ; + + if ( use_colors ) + { + sres = ( solved ? ( CL_MAG "BLOCK SOLVED" CL_WHT ) + : result ? ( CL_GRN "Accepted" CL_WHT ) + : ( CL_RED "Rejected" CL_WHT ) ); + + // colour code the share diff to highlight high value. + if ( solved ) + sprintf( diffstr, "%s%.3g%s", CL_MAG, sharediff, CL_WHT ); + else if ( share_size > 0.01 ) + sprintf( diffstr, "%s%.3g%s", CL_GRN, sharediff, CL_WHT ); + else if ( share_size > 0.001 ) + sprintf( diffstr, "%s%.3g%s", CL_CYN, sharediff, CL_WHT ); + else if ( share_hash_rate > hashrate ) + sprintf( diffstr, "%s%.3g%s", CL_YLW, sharediff, CL_WHT ); else - sprintf( rate_s, "%.1f", ( rate < 99.9 ) ? rate : 99.9 ); + sprintf( diffstr, "%.3g", sharediff ); } else { - rate = 100. * rejected_share_count / total_submits; - if ( rate < 0.1 ) - sprintf( rate_s, "%.1f", 0.10 ); - else - sprintf( rate_s, "%.1f", rate ); + sres = ( solved ? "BLOCK SOLVED" + : result ? "Accepted" : "Rejected" ); + sprintf( diffstr, "%3g", sharediff ); } - scale_hash_for_display ( &hashcount, hc_units ); scale_hash_for_display ( &hashrate, hr_units ); - if ( hc_units[0] ) - { - sprintf(hc, "%.2f", hashcount ); - if ( hashrate < 10 ) - // very low hashrate, add digits - sprintf(hr, "%.4f", hashrate ); - else - sprintf(hr, "%.2f", hashrate ); - } + if ( hashrate < 10. ) + sprintf(hr, "%.4f", hashrate ); else - { - // no fractions of a hash - sprintf(hc, "%.0f", hashcount ); sprintf(hr, "%.2f", hashrate ); - } - if ( sharediff == 0 ) - { -#if ((defined(_WIN64) || defined(__WINDOWS__))) - applog( LOG_NOTICE, "%s %lu/%lu (%s%%), %s %sH, %s %sH/s", - sres, ( result ? accepted_share_count : rejected_share_count ), - total_submits, rate_s, hc, hc_units, hr, hr_units ); -#else - applog( LOG_NOTICE, "%s %lu/%lu (%s%%), %s %sH, %s %sH/s, %dC", - sres, ( result ? accepted_share_count : rejected_share_count ), - total_submits, rate_s, hc, hc_units, hr, hr_units, - (uint32_t)cpu_temp(0) ); -#endif - } - else + applog( LOG_NOTICE, "%s, diff %s, %.3f secs, A/R/B: %d/%d/%d.", + sres, diffstr, share_time, accepted_share_count, + rejected_share_count, solved_block_count ); + + if ( have_stratum && result && sharediff && net_diff && !opt_quiet ) { -#if ((defined(_WIN64) || defined(__WINDOWS__))) - applog( LOG_NOTICE, "%s %lu/%lu (%s%%), diff %.3g%s, %s %sH/s", - sres, ( result ? accepted_share_count : rejected_share_count ), - total_submits, rate_s, sharediff, sol, hr, hr_units ); -#else - applog( LOG_NOTICE, "%s %lu/%lu (%s%%), diff %.3g%s, %s %sH/s, %dC", - sres, ( result ? accepted_share_count : rejected_share_count ), - total_submits, rate_s, sharediff, sol, hr, hr_units, - (uint32_t)cpu_temp(0) ); -#endif +// double share_hash_rate = share_time == 0. ? 0. : share_hash / share_time; + + scale_hash_for_display ( &share_hash_rate, shr_units ); + if ( share_hash_rate < 10 ) + // very low hashrate, add digits + sprintf( shr, "%.4f", share_hash_rate ); + else + sprintf( shr, "%.2f", share_hash_rate ); + + applog( LOG_NOTICE, "Miner %s %sH/s, Share %s %sH/s, Latency %d ms.", + hr, hr_units, shr, shr_units, latency ); + applog( LOG_NOTICE, "Height %d, Block share %.5f%%.", + stratum.bloc_height, share_size*100. ); } - if (reason) + if ( reason ) { - applog(LOG_WARNING, "reject reason: %s", reason); + applog( LOG_WARNING, "reject reason: %s", reason ); /* if (strncmp(reason, "low difficulty share", 20) == 0) { @@ -1554,7 +1556,12 @@ static bool get_work(struct thr_info *thr, struct work *work) bool submit_work(struct thr_info *thr, const struct work *work_in) { struct workio_cmd *wc; - /* fill out work request message */ + + memcpy( &prev_submit_time, &submit_time, sizeof submit_time ); + gettimeofday( &submit_time, NULL ); + timeval_subtract( &submit_interval, &submit_time, &prev_submit_time ); + + /* fill out work request message */ wc = (struct workio_cmd *) calloc(1, sizeof(*wc)); if (!wc) return false; @@ -1783,6 +1790,8 @@ static void *miner_thread( void *userdata ) int thr_id = mythr->id; struct work work; uint32_t max_nonce; + struct timeval et; + struct timeval time_now; // end_nonce gets read before being set so it needs to be initialized // what is an appropriate value that is completely neutral? @@ -1991,7 +2000,7 @@ static void *miner_thread( void *userdata ) gettimeofday( (struct timeval *) &tv_start, NULL ); // Scan for nonce - nonce_found = algo_gate.scanhash( thr_id, &work, max_nonce, + nonce_found = algo_gate.scanhash( &work, max_nonce, &hashes_done, mythr ); // record scanhash elapsed time @@ -2001,36 +2010,20 @@ static void *miner_thread( void *userdata ) { pthread_mutex_lock( &stats_lock ); thr_hashcount[thr_id] = hashes_done; - thr_hashrates[thr_id] = - hashes_done / ( diff.tv_sec + diff.tv_usec * 1e-6 ); - pthread_mutex_unlock( &stats_lock ); + thr_hashrates[thr_id] = + hashes_done / ( diff.tv_sec + diff.tv_usec * 1e-6 ); + pthread_mutex_unlock( &stats_lock ); } - // if nonce(s) found submit work if ( nonce_found && !opt_benchmark ) - { // 4 way with multiple nonces, copy individually to work and submit. - if ( nonce_found > 1 ) - for ( int n = 0; n < nonce_found; n++ ) + { + if ( !submit_work( mythr, &work ) ) { - *algo_gate.get_nonceptr( work.data ) = work.nonces[n]; - if ( submit_work( mythr, &work ) ) - applog( LOG_NOTICE, "Share submitted." ); - else - { - applog( LOG_WARNING, "Failed to submit share." ); - break; - } + applog( LOG_WARNING, "Failed to submit share." ); + break; } - else - { // only 1 nonce, in work ready to submit. - - if ( !submit_work( mythr, &work ) ) - { - applog( LOG_WARNING, "Failed to submit share." ); - break; - } + if ( !opt_quiet ) applog( LOG_NOTICE, "Share submitted." ); - } // prevent stale work in solo // we can't submit twice a block! @@ -2042,8 +2035,70 @@ static void *miner_thread( void *userdata ) pthread_mutex_unlock( &g_work_lock ); } } + // Check for 5 minute summary report, mutex until global counters + // are read and reset. It's bad form to unlock inside a conditional + // block but more efficient. The logic is reversed to make the mutex + // issue obvious. + pthread_mutex_lock( &stats_lock ); + + gettimeofday( &time_now, NULL ); + timeval_subtract( &et, &time_now, &five_min_start ); + if ( et.tv_sec < 300 ) + pthread_mutex_unlock( &stats_lock ); + else + { + // collect and reset counters + double hash = shash_sum; shash_sum = 0.; + double bhash = bhash_sum; bhash_sum = 0.; + double time = time_sum; time_sum = 0.; + uint64_t submits = submits_sum; submits_sum = 0; + uint64_t latency = latency_sum; latency_sum = 0; + memcpy( &five_min_start, &time_now, sizeof time_now ); + + pthread_mutex_unlock( &stats_lock ); + + char hr[16]; + char hr_units[4] = {0}; + char bshstr[32]; + double hrate = time == 0. ? 0. : hash / time; + double avg_share = bhash == 0. ? 0. : hash / bhash * 100.; + latency = submits ? latency / submits : 0; + + // colour code the block share to highlight high value. + if ( avg_share > 90.0 ) + sprintf( bshstr, "%s%.5f%s", CL_MAG, avg_share, CL_WHT ); + else if ( avg_share > 1.0 ) + sprintf( bshstr, "%s%.5f%s", CL_GRN, avg_share, CL_WHT ); + else if ( avg_share > 0.1 ) + sprintf( bshstr, "%s%.5f%s", CL_CYN, avg_share, CL_WHT ); + else if ( hrate > global_hashrate ) + sprintf( bshstr, "%s%.5f%s", CL_YLW, avg_share, CL_WHT ); + else + sprintf( bshstr, "%.5f", avg_share ); + + scale_hash_for_display ( &hrate, hr_units ); + if ( hrate < 10. ) + // very low hashrate, add digits + sprintf( hr, "%.4f", hrate ); + else + sprintf( hr, "%.2f", hrate ); + + applog(LOG_NOTICE,"Summary: %d submits in %dm%02ds, block share %s%%.", + (uint64_t)submits, et.tv_sec / 60, + et.tv_sec % 60, bshstr ); + +#if ((defined(_WIN64) || defined(__WINDOWS__))) + applog(LOG_NOTICE,"Share hashrate %s %sH/s, latency %d ms.", + hr, hr_units, latency ); +#else + applog(LOG_NOTICE,"Share hashrate %s %sH/s, latency %d ms, temp %dC.", + hr, hr_units, latency, (uint32_t)cpu_temp(0) ); +#endif + + } + // display hashrate - if ( !opt_quiet ) + if ( opt_hash_meter ) { char hc[16]; char hr[16]; @@ -2086,7 +2141,7 @@ static void *miner_thread( void *userdata ) char hc_units[2] = {0,0}; char hr[16]; char hr_units[2] = {0,0}; - scale_hash_for_display( &hashcount, hc_units ); + scale_hash_for_display( &hashcount, hc_units ); scale_hash_for_display( &hashrate, hr_units ); if ( hc_units[0] ) sprintf( hc, "%.2f", hashcount ); @@ -2507,15 +2562,15 @@ static void *stratum_thread(void *userdata ) if ( last_bloc_height != stratum.bloc_height ) { last_bloc_height = stratum.bloc_height; -// if ( !opt_quiet ) -// { + if ( !opt_quiet ) + { if (net_diff > 0.) applog(LOG_BLUE, "%s block %d, network diff %.3f", algo_names[opt_algo], stratum.bloc_height, net_diff); else applog(LOG_BLUE, "%s %s block %d", short_url, algo_names[opt_algo], stratum.bloc_height); -// } + } } restart_threads(); } @@ -2887,7 +2942,10 @@ void parse_arg(int key, char *arg ) case 1013: opt_showdiff = false; break; - case 1016: /* --coinbase-addr */ + case 1014: // hash-meter + opt_hash_meter = true; + break; + case 1016: /* --coinbase-addr */ pk_script_size = address_to_script(pk_script, sizeof(pk_script), arg); if (!pk_script_size) { fprintf(stderr, "invalid address -- '%s'\n", arg); @@ -3343,6 +3401,11 @@ int main(int argc, char *argv[]) // All options must be set before starting the gate if ( !register_algo_gate( opt_algo, &algo_gate ) ) exit(1); + // Initialize stats times and counters + gettimeofday( &prev_submit_time, NULL ); + memcpy( &submit_time, &prev_submit_time, sizeof submit_time ); + memcpy( &five_min_start, &prev_submit_time, sizeof prev_submit_time ); + if ( !check_cpu_capability() ) exit(1); pthread_mutex_init(&stats_lock, NULL); diff --git a/miner.h b/miner.h index ab62bdcf..71ad5463 100644 --- a/miner.h +++ b/miner.h @@ -363,7 +363,6 @@ struct work { char *job_id; size_t xnonce2_len; unsigned char *xnonce2; - uint32_t nonces[8]; // deprecated } __attribute__ ((aligned (64))); struct stratum_job { @@ -521,7 +520,7 @@ enum algos { ALGO_LYRA2RE, ALGO_LYRA2REV2, ALGO_LYRA2REV3, - ALGO_LYRA2Z, + ALGO_LYRA2Z, ALGO_LYRA2Z330, ALGO_M7M, ALGO_MYR_GR, @@ -529,7 +528,7 @@ enum algos { ALGO_NIST5, ALGO_PENTABLAKE, ALGO_PHI1612, - ALGO_PHI2, + ALGO_PHI2, ALGO_PLUCK, ALGO_POLYTIMOS, ALGO_QUARK, @@ -537,14 +536,14 @@ enum algos { ALGO_SCRYPT, ALGO_SCRYPTJANE, ALGO_SHA256D, - ALGO_SHA256T, ALGO_SHA256Q, + ALGO_SHA256T, ALGO_SHAVITE3, ALGO_SKEIN, ALGO_SKEIN2, ALGO_SKUNK, ALGO_SONOA, - ALGO_TIMETRAVEL, + ALGO_TIMETRAVEL, ALGO_TIMETRAVEL10, ALGO_TRIBUS, ALGO_VANILLA, @@ -609,7 +608,7 @@ static const char* const algo_names[] = { "lyra2re", "lyra2rev2", "lyra2rev3", - "lyra2z", + "lyra2z", "lyra2z330", "m7m", "myr-gr", @@ -618,21 +617,21 @@ static const char* const algo_names[] = { "pentablake", "phi1612", "phi2", - "pluck", + "pluck", "polytimos", "quark", "qubit", "scrypt", "scryptjane", "sha256d", - "sha256t", "sha256q", + "sha256t", "shavite3", "skein", "skein2", "skunk", "sonoa", - "timetravel", + "timetravel", "timetravel10", "tribus", "vanilla", @@ -711,6 +710,7 @@ extern bool opt_stratum_stats; extern int num_cpus; extern int num_cpugroups; extern int opt_priority; +extern bool opt_hash_meter; extern uint32_t accepted_share_count; extern uint32_t rejected_share_count; extern uint32_t solved_block_count; @@ -767,7 +767,7 @@ Options:\n\ pentablake 5 x blake512\n\ phi1612 phi, LUX coin (original algo)\n\ phi2 LUX (new algo)\n\ - pluck Pluck:128 (Supcoin)\n\ + pluck Pluck:128 (Supcoin)\n\ polytimos\n\ quark Quark\n\ qubit Qubit\n\ @@ -775,14 +775,14 @@ Options:\n\ scrypt:N scrypt(N, 1, 1)\n\ scryptjane:nf\n\ sha256d Double SHA-256\n\ - sha256t Triple SHA-256, Onecoin (OC)\n\ sha256q Quad SHA-256, Pyrite (PYE)\n\ - shavite3 Shavite3\n\ + sha256t Triple SHA-256, Onecoin (OC)\n\ + shavite3 Shavite3\n\ skein Skein+Sha (Skeincoin)\n\ skein2 Double Skein (Woodcoin)\n\ skunk Signatum (SIGT)\n\ sonoa Sono\n\ - timetravel timeravel8, Machinecoin (MAC)\n\ + timetravel timeravel8, Machinecoin (MAC)\n\ timetravel10 Bitcore (BTX)\n\ tribus Denarius (DNR)\n\ vanilla blake256r8vnl (VCash)\n\ @@ -825,6 +825,7 @@ Options:\n\ --randomize Randomize scan range start to reduce duplicates\n\ -f, --diff-factor Divide req. difficulty by this factor (std is 1.0)\n\ -m, --diff-multiplier Multiply difficulty by this factor (std is 1.0)\n\ + --hash-meter Display thread hash rates\n\ --hide-diff Do not display changes in difficulty\n\ --coinbase-addr=ADDR payout address for solo mining\n\ --coinbase-sig=TEXT data to insert in the coinbase when possible\n\ @@ -888,6 +889,7 @@ static struct option const options[] = { { "diff-factor", 1, NULL, 'f' }, { "diff", 1, NULL, 'f' }, // deprecated (alias) { "diff-multiplier", 1, NULL, 'm' }, + { "hash-meter", 0, NULL, 1014 }, { "hide-diff", 0, NULL, 1013 }, { "help", 0, NULL, 'h' }, { "no-gbt", 0, NULL, 1011 }, diff --git a/simd-utils/intrlv-avx2.h b/simd-utils/intrlv-avx2.h index 00c95b59..f7a19ab1 100644 --- a/simd-utils/intrlv-avx2.h +++ b/simd-utils/intrlv-avx2.h @@ -355,7 +355,6 @@ static inline void mm256_dintrlv_2x128x256( void *d0, void *d1, // Interleave 8 source buffers containing 32 bit data into the destination // vector -#define mm256_interleave_8x32 mm256_intrlv_8x32 static inline void mm256_intrlv_8x32( void *d, const void *s0, const void *s1, const void *s2, const void *s3, const void *s4, const void *s5, const void *s6, const void *s7, int bit_len ) @@ -396,7 +395,6 @@ static inline void mm256_bswap_intrlv80_8x32( void *d, const void *s ) // Deinterleave 8 buffers of 32 bit data from the source buffer. // Sub-function can be called directly for 32 byte final hash. -#define mm256_deinterleave_8x32 mm256_dintrlv_8x32 static inline void mm256_dintrlv_8x32( void *d0, void *d1, void *d2, void *d3, void *d4, void *d5, void *d6, void *d7, const void *s, int bit_len ) @@ -439,7 +437,6 @@ static inline void mm256_extract_lane_8x32( void *d, const void *s, // Interleave 4 source buffers containing 64 bit data into the destination // buffer. Only bit_len 256, 512, 640 & 1024 are supported. -#define mm256_interleave_4x64 mm256_intrlv_4x64 static inline void mm256_intrlv_4x64( void *d, const void *s0, const void *s1, const void *s2, const void *s3, int bit_len ) { @@ -487,7 +484,6 @@ do { \ // Deinterleave 4 buffers of 64 bit data from the source buffer. // bit_len must be 256, 512, 640 or 1024 bits. // Requires overrun padding for 640 bit len. -#define mm256_deinterleave_4x64 mm256_dintrlv_4x64 static inline void mm256_dintrlv_4x64( void *d0, void *d1, void *d2, void *d3, const void *s, int bit_len ) { @@ -521,7 +517,6 @@ static inline void mm256_extract_lane_4x64( void *d, const void *s, // Convert from 4x32 SSE2 interleaving to 4x64 AVX2. // Can't do it in place -#define mm256_reinterleave_4x32_4x64 mm256_rintrlv_4x32_4x64 static inline void mm256_rintrlv_4x32_4x64( void *dst, void *src, int bit_len ) { @@ -559,7 +554,6 @@ static inline void mm256_rintrlv_4x32_4x64( void *dst, void *src, // Convert 4x64 byte (256 bit) vectors to 4x32 (128 bit) vectors for AVX // bit_len must be multiple of 64 -#define mm256_reinterleave_4x64_4x32 mm256_rintrlv_4x64_4x32 static inline void mm256_rintrlv_4x64_4x32( void *dst, void *src, int bit_len ) { @@ -595,7 +589,6 @@ static inline void mm256_rintrlv_4x64_4x32( void *dst, void *src, // bit_len == 1024 } -#define mm256_reinterleave_4x64_2x128 mm256_rintrlv_4x64_2x128 static inline void mm256_rintrlv_4x64_2x128( void *dst0, void *dst1, const void *src, int bit_len ) { @@ -632,7 +625,6 @@ static inline void mm256_rintrlv_4x64_2x128( void *dst0, void *dst1, d1[7] = _mm256_set_epi64x( s[63], s[59], s[62], s[58] ); } -#define mm256_reinterleave_2x128_4x64 mm256_rintrlv_2x128_4x64 static inline void mm256_rintrlv_2x128_4x64( void *dst, const void *src0, const void *src1, int bit_len ) { @@ -666,7 +658,6 @@ static inline void mm256_rintrlv_2x128_4x64( void *dst, const void *src0, } -#define mm256_interleave_2x128 mm256_intrlv_2x128 static inline void mm256_intrlv_2x128( const void *d, const void *s0, void *s1, const int bit_len ) { @@ -709,7 +700,6 @@ static inline void mm256_intrlv_2x128( const void *d, const void *s0, casti_m256i( d,7 ) = mm256_concat_128( s1hi, s0hi ); } -#define mm256_deinterleave_2x128 mm256_dintrlv_2x128 static inline void mm256_dintrlv_2x128( void *dst0, void *dst1, const void *s, int bit_len ) { diff --git a/simd-utils/intrlv-sse2.h b/simd-utils/intrlv-sse2.h index a28b873a..65ad84ef 100644 --- a/simd-utils/intrlv-sse2.h +++ b/simd-utils/intrlv-sse2.h @@ -99,7 +99,6 @@ do { \ // interleave 4 arrays of 32 bit elements for 128 bit processing // bit_len must be 256, 512 or 640 bits. -#define mm128_interleave_4x32 mm128_intrlv_4x32 static inline void mm128_intrlv_4x32( void *d, const void *s0, const void *s1, const void *s2, const void *s3, int bit_len ) { @@ -127,7 +126,6 @@ static inline void mm128_intrlv_4x32( void *d, const void *s0, // Still used by decred due to odd data size: 180 bytes // bit_len must be multiple of 32 -#define mm128_interleave_4x32x mm128_intrlv_4x32x static inline void mm128_intrlv_4x32x( void *dst, void *src0, void *src1, void *src2, void *src3, int bit_len ) { @@ -146,7 +144,6 @@ static inline void mm128_intrlv_4x32x( void *dst, void *src0, void *src1, } } -#define mm128_deinterleave_4x32 mm128_dintrlv_4x32 static inline void mm128_dintrlv_4x32( void *d0, void *d1, void *d2, void *d3, const void *s, int bit_len ) {