diff --git a/Makefile.am b/Makefile.am index 8d944d7b..c3a999d2 100644 --- a/Makefile.am +++ b/Makefile.am @@ -85,6 +85,7 @@ cpuminer_SOURCES = \ algo/groestl/aes_ni/hash-groestl.c \ algo/groestl/aes_ni/hash-groestl256.c \ algo/fugue/sph_fugue.c \ + algo/fugue/fugue-aesni.c \ algo/hamsi/sph_hamsi.c \ algo/hamsi/hamsi-hash-4way.c \ algo/haval/haval.c \ diff --git a/RELEASE_NOTES b/RELEASE_NOTES index 1fd50fa7..4c6e60f7 100644 --- a/RELEASE_NOTES +++ b/RELEASE_NOTES @@ -44,7 +44,7 @@ Please include the following information: 1. CPU model, operating system, cpuminer-opt version (must be latest), binary file for Windows, changes to default build procedure for Linux. -2. Exact comand line (except user and pw) and intial output showing +2. Exact command line (except user and pw) and intial output showing the above requested info. 3. Additional program output showing any error messages or other @@ -65,6 +65,12 @@ If not what makes it happen or not happen? Change Log ---------- +v3.15.0 + +Fugue optimized with AES, improves many sha3 algos. +Minotaur algo optimized for all architectures. +Fixed neoscrypt BUG log. + v3.14.3 #265: more mutex changes to reduce blocking with high thread count. diff --git a/algo/fugue/fugue-aesni.c b/algo/fugue/fugue-aesni.c new file mode 100644 index 00000000..dde1b21c --- /dev/null +++ b/algo/fugue/fugue-aesni.c @@ -0,0 +1,567 @@ +/* + * file : fugue_vperm.c + * version : 1.0.208 + * date : 14.12.2010 + * + * - vperm and aes_ni implementations of hash function Fugue + * - implements NIST hash api + * - assumes that message lenght is multiple of 8-bits + * - _FUGUE_VPERM_ must be defined if compiling with ../main.c + * - default version is vperm, define AES_NI for aes_ni version + * + * Cagdas Calik + * ccalik@metu.edu.tr + * Institute of Applied Mathematics, Middle East Technical University, Turkey. + * + */ + +#if defined(__AES__) + +#include + +#include +#include "fugue-aesni.h" + + +MYALIGN const unsigned long long _supermix1a[] = {0x0202010807020100, 0x0a05000f06010c0b}; +MYALIGN const unsigned long long _supermix1b[] = {0x0b0d080703060504, 0x0e0a090c050e0f0a}; +MYALIGN const unsigned long long _supermix1c[] = {0x0402060c070d0003, 0x090a060580808080}; +MYALIGN const unsigned long long _supermix1d[] = {0x808080800f0e0d0c, 0x0f0e0d0c80808080}; +MYALIGN const unsigned long long _supermix2a[] = {0x07020d0880808080, 0x0b06010c050e0f0a}; +MYALIGN const unsigned long long _supermix4a[] = {0x000f0a050c0b0601, 0x0302020404030e09}; +MYALIGN const unsigned long long _supermix4b[] = {0x07020d08080e0d0d, 0x07070908050e0f0a}; +MYALIGN const unsigned long long _supermix4c[] = {0x0706050403020000, 0x0302000007060504}; +MYALIGN const unsigned long long _supermix7a[] = {0x010c0b060d080702, 0x0904030e03000104}; +MYALIGN const unsigned long long _supermix7b[] = {0x8080808080808080, 0x0504070605040f06}; +MYALIGN const unsigned long long _k_n[] = {0x4E4E4E4E4E4E4E4E, 0x1B1B1B1B0E0E0E0E}; +MYALIGN const unsigned int _maskd3n[] = {0xffffffff, 0xffffffff, 0xffffffff, 0x00000000}; +MYALIGN const unsigned char _shift_one_mask[] = {7, 4, 5, 6, 11, 8, 9, 10, 15, 12, 13, 14, 3, 0, 1, 2}; +MYALIGN const unsigned char _shift_four_mask[] = {13, 14, 15, 12, 1, 2, 3, 0, 5, 6, 7, 4, 9, 10, 11, 8}; +MYALIGN const unsigned char _shift_seven_mask[] = {10, 11, 8, 9, 14, 15, 12, 13, 2, 3, 0, 1, 6, 7, 4, 5}; +MYALIGN const unsigned char _aes_shift_rows[] = {0, 5, 10, 15, 4, 9, 14, 3, 8, 13, 2, 7, 12, 1, 6, 11}; +MYALIGN const unsigned int _inv_shift_rows[] = {0x070a0d00, 0x0b0e0104, 0x0f020508, 0x0306090c}; +MYALIGN const unsigned int _zero[] = {0x00000000, 0x00000000, 0x00000000, 0x00000000}; +MYALIGN const unsigned int _mul2mask[] = {0x1b1b0000, 0x00000000, 0x00000000, 0x00000000}; +MYALIGN const unsigned int _mul4mask[] = {0x2d361b00, 0x00000000, 0x00000000, 0x00000000}; +MYALIGN const unsigned int _lsbmask2[] = {0x03030303, 0x03030303, 0x03030303, 0x03030303}; + + +MYALIGN const unsigned int _IV512[] = { + 0x00000000, 0x00000000, 0x7ea50788, 0x00000000, + 0x75af16e6, 0xdbe4d3c5, 0x27b09aac, 0x00000000, + 0x17f115d9, 0x54cceeb6, 0x0b02e806, 0x00000000, + 0xd1ef924a, 0xc9e2c6aa, 0x9813b2dd, 0x00000000, + 0x3858e6ca, 0x3f207f43, 0xe778ea25, 0x00000000, + 0xd6dd1f95, 0x1dd16eda, 0x67353ee1, 0x00000000}; + +#if defined(__SSE4_1__) + +#define PACK_S0(s0, s1, t1)\ + s0 = _mm_castps_si128(_mm_insert_ps(_mm_castsi128_ps(s0), _mm_castsi128_ps(s1), 0x30)) + +#define UNPACK_S0(s0, s1, t1)\ + s1 = _mm_castps_si128(_mm_insert_ps(_mm_castsi128_ps(s1), _mm_castsi128_ps(s0), 0xc0));\ + s0 = _mm_and_si128(s0, M128(_maskd3n)) + +#define CMIX(s1, s2, r1, r2, t1, t2)\ + t1 = s1;\ + t1 = _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(t1), _mm_castsi128_ps(s2), _MM_SHUFFLE(3, 0, 2, 1)));\ + r1 = _mm_xor_si128(r1, t1);\ + r2 = _mm_xor_si128(r2, t1); + +#else // SSE2 + +#define PACK_S0(s0, s1, t1)\ + t1 = _mm_shuffle_epi32(s1, _MM_SHUFFLE(0, 3, 3, 3));\ + s0 = _mm_xor_si128(s0, t1); + +#define UNPACK_S0(s0, s1, t1)\ + t1 = _mm_shuffle_epi32(s0, _MM_SHUFFLE(3, 3, 3, 3));\ + s1 = _mm_castps_si128(_mm_move_ss(_mm_castsi128_ps(s1), _mm_castsi128_ps(t1)));\ + s0 = _mm_and_si128(s0, M128(_maskd3n)) + +#define CMIX(s1, s2, r1, r2, t1, t2)\ + t1 = _mm_shuffle_epi32(s1, 0xf9);\ + t2 = _mm_shuffle_epi32(s2, 0xcf);\ + t1 = _mm_xor_si128(t1, t2);\ + r1 = _mm_xor_si128(r1, t1);\ + r2 = _mm_xor_si128(r2, t1) + +#endif + +#define TIX256(msg, s10, s8, s24, s0, t1, t2, t3)\ + t1 = _mm_shuffle_epi32(s0, _MM_SHUFFLE(3, 3, 0, 3));\ + s10 = _mm_xor_si128(s10, t1);\ + t1 = _mm_castps_si128(_mm_load_ss((float*)msg));\ + s0 = _mm_castps_si128(_mm_move_ss(_mm_castsi128_ps(s0), _mm_castsi128_ps(t1)));\ + t1 = _mm_slli_si128(t1, 8);\ + s8 = _mm_xor_si128(s8, t1);\ + t1 = _mm_shuffle_epi32(s24, _MM_SHUFFLE(3, 3, 0, 3));\ + s0 = _mm_xor_si128(s0, t1) + + +#define TIX384(msg, s16, s8, s27, s30, s0, s4, t1, t2, t3)\ + t1 = _mm_shuffle_epi32(s0, _MM_SHUFFLE(3, 3, 0, 3));\ + s16 = _mm_xor_si128(s16, t1);\ + t1 = _mm_castps_si128(_mm_load_ss((float*)msg));\ + s0 = _mm_castps_si128(_mm_move_ss(_mm_castsi128_ps(s0), _mm_castsi128_ps(t1)));\ + t1 = _mm_slli_si128(t1, 8);\ + s8 = _mm_xor_si128(s8, t1);\ + t1 = _mm_shuffle_epi32(s27, _MM_SHUFFLE(3, 3, 0, 3));\ + s0 = _mm_xor_si128(s0, t1);\ + t1 = _mm_shuffle_epi32(s30, _MM_SHUFFLE(3, 3, 0, 3));\ + s4 = _mm_xor_si128(s4, t1) + +#define TIX512(msg, s22, s8, s24, s27, s30, s0, s4, s7, t1, t2, t3)\ + t1 = _mm_shuffle_epi32(s0, _MM_SHUFFLE(3, 3, 0, 3));\ + s22 = _mm_xor_si128(s22, t1);\ + t1 = _mm_castps_si128(_mm_load_ss((float*)msg));\ + s0 = _mm_castps_si128(_mm_move_ss(_mm_castsi128_ps(s0), _mm_castsi128_ps(t1)));\ + t1 = _mm_slli_si128(t1, 8);\ + s8 = _mm_xor_si128(s8, t1);\ + t1 = _mm_shuffle_epi32(s24, _MM_SHUFFLE(3, 3, 0, 3));\ + s0 = _mm_xor_si128(s0, t1);\ + t1 = _mm_shuffle_epi32(s27, _MM_SHUFFLE(3, 3, 0, 3));\ + s4 = _mm_xor_si128(s4, t1);\ + t1 = _mm_shuffle_epi32(s30, _MM_SHUFFLE(3, 3, 0, 3));\ + s7 = _mm_xor_si128(s7, t1) + + +#define PRESUPERMIX(x, t1, s1, s2, t2)\ + s1 = x;\ + s2 = _mm_add_epi8(x, x);\ + t2 = _mm_add_epi8(s2, s2);\ + t1 = _mm_srli_epi16(x, 6);\ + t1 = _mm_and_si128(t1, M128(_lsbmask2));\ + s2 = _mm_xor_si128(s2, _mm_shuffle_epi8(M128(_mul2mask), t1));\ + x = _mm_xor_si128(t2, _mm_shuffle_epi8(M128(_mul4mask), t1)) + +#define SUBSTITUTE(r0, _t1, _t2, _t3, _t0)\ + _t2 = _mm_shuffle_epi8(r0, M128(_inv_shift_rows));\ + _t2 = _mm_aesenclast_si128(_t2, M128(_zero)) + +#define SUPERMIX(t0, t1, t2, t3, t4)\ + PRESUPERMIX(t0, t1, t2, t3, t4);\ + POSTSUPERMIX(t0, t1, t2, t3, t4) + + +#define POSTSUPERMIX(t0, t1, t2, t3, t4)\ + t1 = t2;\ + t1 = _mm_shuffle_epi8(t1, M128(_supermix1b));\ + t4 = t1;\ + t1 = _mm_shuffle_epi8(t1, M128(_supermix1c));\ + t4 = _mm_xor_si128(t4, t1);\ + t1 = t4;\ + t1 = _mm_shuffle_epi8(t1, M128(_supermix1d));\ + t4 = _mm_xor_si128(t4, t1);\ + t1 = t2;\ + t1 = _mm_shuffle_epi8(t1, M128(_supermix1a));\ + t4 = _mm_xor_si128(t4, t1);\ + t2 = _mm_xor_si128(t2, t3);\ + t2 = _mm_xor_si128(t2, t0);\ + t2 = _mm_shuffle_epi8(t2, M128(_supermix7a));\ + t4 = _mm_xor_si128(t4, t2);\ + t2 = _mm_shuffle_epi8(t2, M128(_supermix7b));\ + t4 = _mm_xor_si128(t4, t2);\ + t3 = _mm_shuffle_epi8(t3, M128(_supermix2a));\ + t1 = t0;\ + t1 = _mm_shuffle_epi8(t1, M128(_supermix4a));\ + t4 = _mm_xor_si128(t4, t1);\ + t0 = _mm_shuffle_epi8(t0, M128(_supermix4b));\ + t0 = _mm_xor_si128(t0, t3);\ + t4 = _mm_xor_si128(t4, t0);\ + t0 = _mm_shuffle_epi8(t0, M128(_supermix4c));\ + t4 = _mm_xor_si128(t4, t0) + + +#define SUBROUND512_3(r1a, r1b, r1c, r1d, r2a, r2b, r2c, r2d, r3a, r3b, r3c, r3d)\ + CMIX(r1a, r1b, r1c, r1d, _t0, _t1);\ + PACK_S0(r1c, r1a, _t0);\ + SUBSTITUTE(r1c, _t1, _t2, _t3, _t0);\ + SUPERMIX(_t2, _t3, _t0, _t1, r1c);\ + _t0 = _mm_shuffle_epi32(r1c, 0x39);\ + r2c = _mm_xor_si128(r2c, _t0);\ + _t0 = _mm_and_si128(_t0, M128(_maskd3n));\ + r2d = _mm_xor_si128(r2d, _t0);\ + UNPACK_S0(r1c, r1a, _t3);\ + SUBSTITUTE(r2c, _t1, _t2, _t3, _t0);\ + SUPERMIX(_t2, _t3, _t0, _t1, r2c);\ + _t0 = _mm_shuffle_epi32(r2c, 0x39);\ + r3c = _mm_xor_si128(r3c, _t0);\ + _t0 = _mm_and_si128(_t0, M128(_maskd3n));\ + r3d = _mm_xor_si128(r3d, _t0);\ + UNPACK_S0(r2c, r2a, _t3);\ + SUBSTITUTE(r3c, _t1, _t2, _t3, _t0);\ + SUPERMIX(_t2, _t3, _t0, _t1, r3c);\ + UNPACK_S0(r3c, r3a, _t3) + + +#define SUBROUND512_4(r1a, r1b, r1c, r1d, r2a, r2b, r2c, r2d, r3a, r3b, r3c, r3d, r4a, r4b, r4c, r4d)\ + CMIX(r1a, r1b, r1c, r1d, _t0, _t1);\ + PACK_S0(r1c, r1a, _t0);\ + SUBSTITUTE(r1c, _t1, _t2, _t3, _t0);\ + SUPERMIX(_t2, _t3, _t0, _t1, r1c);\ + _t0 = _mm_shuffle_epi32(r1c, 0x39);\ + r2c = _mm_xor_si128(r2c, _t0);\ + _t0 = _mm_and_si128(_t0, M128(_maskd3n));\ + r2d = _mm_xor_si128(r2d, _t0);\ + UNPACK_S0(r1c, r1a, _t3);\ + SUBSTITUTE(r2c, _t1, _t2, _t3, _t0);\ + SUPERMIX(_t2, _t3, _t0, _t1, r2c);\ + _t0 = _mm_shuffle_epi32(r2c, 0x39);\ + r3c = _mm_xor_si128(r3c, _t0);\ + _t0 = _mm_and_si128(_t0, M128(_maskd3n));\ + r3d = _mm_xor_si128(r3d, _t0);\ + UNPACK_S0(r2c, r2a, _t3);\ + SUBSTITUTE(r3c, _t1, _t2, _t3, _t0);\ + SUPERMIX(_t2, _t3, _t0, _t1, r3c);\ + _t0 = _mm_shuffle_epi32(r3c, 0x39);\ + r4c = _mm_xor_si128(r4c, _t0);\ + _t0 = _mm_and_si128(_t0, M128(_maskd3n));\ + r4d = _mm_xor_si128(r4d, _t0);\ + UNPACK_S0(r3c, r3a, _t3);\ + SUBSTITUTE(r4c, _t1, _t2, _t3, _t0);\ + SUPERMIX(_t2, _t3, _t0, _t1, r4c);\ + UNPACK_S0(r4c, r4a, _t3) + + + +#define LOADCOLUMN(x, s, a)\ + block[0] = col[(base + a + 0) % s];\ + block[1] = col[(base + a + 1) % s];\ + block[2] = col[(base + a + 2) % s];\ + block[3] = col[(base + a + 3) % s];\ + x = _mm_load_si128((__m128i*)block) + +#define STORECOLUMN(x, s)\ + _mm_store_si128((__m128i*)block, x);\ + col[(base + 0) % s] = block[0];\ + col[(base + 1) % s] = block[1];\ + col[(base + 2) % s] = block[2];\ + col[(base + 3) % s] = block[3] + +void Compress512(hashState_fugue *ctx, const unsigned char *pmsg, unsigned int uBlockCount) +{ + __m128i _t0, _t1, _t2, _t3; + + switch(ctx->base) + { + case 1: + TIX512( pmsg, ctx->state[3], ctx->state[10], ctx->state[4], + ctx->state[5], ctx->state[ 6], ctx->state[8], + ctx->state[9], ctx->state[10], _t0, _t1, _t2 ); + + SUBROUND512_4( ctx->state[8], ctx->state[9], ctx->state[7], + ctx->state[1], ctx->state[7], ctx->state[8], + ctx->state[6], ctx->state[0], ctx->state[6], + ctx->state[7], ctx->state[5], ctx->state[11], + ctx->state[5], ctx->state[6], ctx->state[4], + ctx->state[10] ); + ctx->base++; + pmsg += 4; + uBlockCount--; + if( uBlockCount == 0 ) break; + + case 2: + TIX512( pmsg, ctx->state[11], ctx->state[6], ctx->state[0], + ctx->state[ 1], ctx->state[2], ctx->state[4], + ctx->state[ 5], ctx->state[6], _t0, _t1, _t2); + + SUBROUND512_4( ctx->state[4], ctx->state[5], ctx->state[3], + ctx->state[9], ctx->state[3], ctx->state[4], + ctx->state[2], ctx->state[8], ctx->state[2], + ctx->state[3], ctx->state[1], ctx->state[7], + ctx->state[1], ctx->state[2], ctx->state[0], + ctx->state[6]); + + ctx->base = 0; + pmsg += 4; + uBlockCount--; + break; + } + + + while( uBlockCount > 0 ) + { + TIX512( pmsg, ctx->state[ 7], ctx->state[2], ctx->state[8], ctx->state[9], + ctx->state[10], ctx->state[0], ctx->state[1], ctx->state[2], + _t0, _t1, _t2 ); + SUBROUND512_4( ctx->state[0], ctx->state[1], ctx->state[11], + ctx->state[5], ctx->state[11], ctx->state[0], + ctx->state[10], ctx->state[4], ctx->state[10], + ctx->state[11], ctx->state[9], ctx->state[3], + ctx->state[9], ctx->state[10], ctx->state[8], + ctx->state[2] ); + + ctx->base++; + pmsg += 4; + uBlockCount--; + if( uBlockCount == 0 ) break; + + TIX512( pmsg, ctx->state[3], ctx->state[10], ctx->state[4], ctx->state[5], + ctx->state[6], ctx->state[8], ctx->state[9], ctx->state[10], + _t0, _t1, _t2 ); + + SUBROUND512_4( ctx->state[8], ctx->state[9], ctx->state[7], ctx->state[1], ctx->state[7], ctx->state[8], ctx->state[6], ctx->state[0], + ctx->state[6], ctx->state[7], ctx->state[5], ctx->state[11], + ctx->state[5], ctx->state[6, ctx->state[4], ctx->state[10]); + + ctx->base++; + pmsg += 4; + uBlockCount--; + if( uBlockCount == 0 ) break; + + TIX512( pmsg, ctx->state[11], ctx->state[6], ctx->state[0], ctx->state[1], + ctx->state[2], ctx->state[4], ctx->state[5], ctx->state[6], + _t0, _t1, _t2); + SUBROUND512_4( ctx->state[4], ctx->state[5], ctx->state[3], ctx->state[9], + ctx->state[3], ctx->state[4], ctx->state[2], ctx->state[8], + ctx->state[2], ctx->state[3], ctx->state[1], ctx->state[7], + ctx->state[1], ctx->state[2], ctx->state[0], ctx->state[6]); + + ctx->base = 0; + pmsg += 4; + uBlockCount--; + } + +} + +void Final512(hashState_fugue *ctx, BitSequence *hashval) +{ + unsigned int block[4] __attribute__ ((aligned (32))); + unsigned int col[36] __attribute__ ((aligned (16))); + unsigned int i, base; + __m128i r0, _t0, _t1, _t2, _t3; + + for(i = 0; i < 12; i++) + { + _mm_store_si128((__m128i*)block, ctx->state[i]); + + col[3 * i + 0] = block[0]; + col[3 * i + 1] = block[1]; + col[3 * i + 2] = block[2]; + } + + base = (36 - (12 * ctx->base)) % 36; + + for(i = 0; i < 32; i++) + { + // ROR3 + base = (base + 33) % 36; + + // CMIX + col[(base + 0) % 36] ^= col[(base + 4) % 36]; + col[(base + 1) % 36] ^= col[(base + 5) % 36]; + col[(base + 2) % 36] ^= col[(base + 6) % 36]; + col[(base + 18) % 36] ^= col[(base + 4) % 36]; + col[(base + 19) % 36] ^= col[(base + 5) % 36]; + col[(base + 20) % 36] ^= col[(base + 6) % 36]; + + // SMIX + LOADCOLUMN(r0, 36, 0); + SUBSTITUTE(r0, _t1, _t2, _t3, _t0); + SUPERMIX(_t2, _t3, _t0, _t1, r0); + STORECOLUMN(r0, 36); + } + + for(i = 0; i < 13; i++) + { + // S4 += S0; S9 += S0; S18 += S0; S27 += S0; + col[(base + 4) % 36] ^= col[(base + 0) % 36]; + col[(base + 9) % 36] ^= col[(base + 0) % 36]; + col[(base + 18) % 36] ^= col[(base + 0) % 36]; + col[(base + 27) % 36] ^= col[(base + 0) % 36]; + + // ROR9 + base = (base + 27) % 36; + + // SMIX + LOADCOLUMN(r0, 36, 0); + SUBSTITUTE(r0, _t1, _t2, _t3, _t0); + SUPERMIX(_t2, _t3, _t0, _t1, r0); + STORECOLUMN(r0, 36); + + // S4 += S0; S10 += S0; S18 += S0; S27 += S0; + col[(base + 4) % 36] ^= col[(base + 0) % 36]; + col[(base + 10) % 36] ^= col[(base + 0) % 36]; + col[(base + 18) % 36] ^= col[(base + 0) % 36]; + col[(base + 27) % 36] ^= col[(base + 0) % 36]; + + // ROR9 + base = (base + 27) % 36; + + // SMIX + LOADCOLUMN(r0, 36, 0); + SUBSTITUTE(r0, _t1, _t2, _t3, _t0); + SUPERMIX(_t2, _t3, _t0, _t1, r0); + STORECOLUMN(r0, 36); + + // S4 += S0; S10 += S0; S19 += S0; S27 += S0; + col[(base + 4) % 36] ^= col[(base + 0) % 36]; + col[(base + 10) % 36] ^= col[(base + 0) % 36]; + col[(base + 19) % 36] ^= col[(base + 0) % 36]; + col[(base + 27) % 36] ^= col[(base + 0) % 36]; + + // ROR9 + base = (base + 27) % 36; + + // SMIX + LOADCOLUMN(r0, 36, 0); + SUBSTITUTE(r0, _t1, _t2, _t3, _t0); + SUPERMIX(_t2, _t3, _t0, _t1, r0); + STORECOLUMN(r0, 36); + + // S4 += S0; S10 += S0; S19 += S0; S28 += S0; + col[(base + 4) % 36] ^= col[(base + 0) % 36]; + col[(base + 10) % 36] ^= col[(base + 0) % 36]; + col[(base + 19) % 36] ^= col[(base + 0) % 36]; + col[(base + 28) % 36] ^= col[(base + 0) % 36]; + + // ROR8 + base = (base + 28) % 36; + + // SMIX + LOADCOLUMN(r0, 36, 0); + SUBSTITUTE(r0, _t1, _t2, _t3, _t0); + SUPERMIX(_t2, _t3, _t0, _t1, r0); + STORECOLUMN(r0, 36); + } + + // S4 += S0; S9 += S0; S18 += S0; S27 += S0; + col[(base + 4) % 36] ^= col[(base + 0) % 36]; + col[(base + 9) % 36] ^= col[(base + 0) % 36]; + col[(base + 18) % 36] ^= col[(base + 0) % 36]; + col[(base + 27) % 36] ^= col[(base + 0) % 36]; + + // Transform to the standard basis and store output; S1 || S2 || S3 || S4 + LOADCOLUMN(r0, 36, 1); + _mm_store_si128((__m128i*)hashval, r0); + + // Transform to the standard basis and store output; S9 || S10 || S11 || S12 + LOADCOLUMN(r0, 36, 9); + _mm_store_si128((__m128i*)hashval + 1, r0); + + // Transform to the standard basis and store output; S18 || S19 || S20 || S21 + LOADCOLUMN(r0, 36, 18); + _mm_store_si128((__m128i*)hashval + 2, r0); + + // Transform to the standard basis and store output; S27 || S28 || S29 || S30 + LOADCOLUMN(r0, 36, 27); + _mm_store_si128((__m128i*)hashval + 3, r0); +} + +HashReturn fugue512_Init(hashState_fugue *ctx, int nHashSize) +{ + int i; + ctx->processed_bits = 0; + ctx->uBufferBytes = 0; + ctx->base = 0; + + + ctx->uHashSize = 512; + ctx->uBlockLength = 4; + + for(i = 0; i < 6; i++) + ctx->state[i] = _mm_setzero_si128(); + + ctx->state[6] = _mm_load_si128((__m128i*)_IV512 + 0); + ctx->state[7] = _mm_load_si128((__m128i*)_IV512 + 1); + ctx->state[8] = _mm_load_si128((__m128i*)_IV512 + 2); + ctx->state[9] = _mm_load_si128((__m128i*)_IV512 + 3); + ctx->state[10] = _mm_load_si128((__m128i*)_IV512 + 4); + ctx->state[11] = _mm_load_si128((__m128i*)_IV512 + 5); + + return SUCCESS; +} + + +HashReturn fugue512_Update(hashState_fugue *state, const void *data, DataLength databitlen) +{ + unsigned int uByteLength, uBlockCount, uRemainingBytes; + + uByteLength = (unsigned int)(databitlen / 8); + + if(state->uBufferBytes + uByteLength >= state->uBlockLength) + { + if(state->uBufferBytes != 0) + { + // Fill the buffer + memcpy(state->buffer + state->uBufferBytes, (void*)data, state->uBlockLength - state->uBufferBytes); + + // Process the buffer + Compress512(state, state->buffer, 1); + + state->processed_bits += state->uBlockLength * 8; + data += state->uBlockLength - state->uBufferBytes; + uByteLength -= state->uBlockLength - state->uBufferBytes; + } + + // buffer now does not contain any unprocessed bytes + + uBlockCount = uByteLength / state->uBlockLength; + uRemainingBytes = uByteLength % state->uBlockLength; + + if(uBlockCount > 0) + { + Compress512(state, data, uBlockCount); + + state->processed_bits += uBlockCount * state->uBlockLength * 8; + data += uBlockCount * state->uBlockLength; + } + + if(uRemainingBytes > 0) + { + memcpy(state->buffer, (void*)data, uRemainingBytes); + } + + state->uBufferBytes = uRemainingBytes; + } + else + { + memcpy(state->buffer + state->uBufferBytes, (void*)data, uByteLength); + state->uBufferBytes += uByteLength; + } + + return SUCCESS; +} + +HashReturn fugue512_Final(hashState_fugue *state, void *hashval) +{ + unsigned int i; + BitSequence lengthbuf[8] __attribute__((aligned(64))); + + // Update message bit count + state->processed_bits += state->uBufferBytes * 8; + + // Pad the remaining buffer bytes with zero + if(state->uBufferBytes != 0) + { + if ( state->uBufferBytes != state->uBlockLength) + memset(state->buffer + state->uBufferBytes, 0, state->uBlockLength - state->uBufferBytes); + + Compress512(state, state->buffer, 1); + } + + // Last two blocks are message length in bits + for(i = 0; i < 8; i++) + lengthbuf[i] = ((state->processed_bits) >> (8 * (7 - i))) & 0xff; + + // Process the last two blocks + Compress512(state, lengthbuf, 2); + + // Finalization + Final512(state, hashval); + + return SUCCESS; +} + + +HashReturn fugue512_full(hashState_fugue *hs, void *hashval, const void *data, DataLength databitlen) +{ + fugue512_Init(hs, 512); + fugue512_Update(hs, data, databitlen*8); + fugue512_Final(hs, hashval); + return SUCCESS; +} + +#endif // AES diff --git a/algo/fugue/fugue-aesni.h b/algo/fugue/fugue-aesni.h new file mode 100644 index 00000000..92a0a2ef --- /dev/null +++ b/algo/fugue/fugue-aesni.h @@ -0,0 +1,46 @@ +/* + * file : hash_api.h + * version : 1.0.208 + * date : 14.12.2010 + * + * Fugue vperm implementation Hash API + * + * Cagdas Calik + * ccalik@metu.edu.tr + * Institute of Applied Mathematics, Middle East Technical University, Turkey. + * + */ + +#ifndef FUGUE_HASH_API_H +#define FUGUE_HASH_API_H + +#if defined(__AES__) + +#include "algo/sha/sha3_common.h" +#include + + +typedef struct +{ + __m128i state[12]; + unsigned int base; + + unsigned int uHashSize; + unsigned int uBlockLength; + unsigned int uBufferBytes; + DataLength processed_bits; + BitSequence buffer[4]; + +} hashState_fugue __attribute__ ((aligned (64))); + +HashReturn fugue512_Init(hashState_fugue *state, int hashbitlen); + +HashReturn fugue512_Update(hashState_fugue *state, const void *data, DataLength databitlen); + +HashReturn fugue512_Final(hashState_fugue *state, void *hashval); + +HashReturn fugue512_full(hashState_fugue *hs, void *hashval, const void *data, DataLength databitlen); + +#endif // AES +#endif // HASH_API_H + diff --git a/algo/quark/hmq1725-4way.c b/algo/quark/hmq1725-4way.c index 8832ea79..e742efa8 100644 --- a/algo/quark/hmq1725-4way.c +++ b/algo/quark/hmq1725-4way.c @@ -16,7 +16,7 @@ #include "algo/simd/simd-hash-2way.h" #include "algo/echo/aes_ni/hash_api.h" #include "algo/hamsi/hamsi-hash-4way.h" -#include "algo/fugue/sph_fugue.h" +#include "algo/fugue/fugue-aesni.h" #include "algo/shabal/shabal-hash-4way.h" #include "algo/whirlpool/sph_whirlpool.h" #include "algo/haval/haval-hash-4way.h" @@ -40,7 +40,7 @@ union _hmq1725_8way_context_overlay cube_4way_context cube; simd_4way_context simd; hamsi512_8way_context hamsi; - sph_fugue512_context fugue; + hashState_fugue fugue; shabal512_8way_context shabal; sph_whirlpool_context whirlpool; sha512_8way_context sha512; @@ -363,14 +363,14 @@ extern void hmq1725_8way_hash(void *state, const void *input) dintrlv_8x64_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7, vhash ); - sph_fugue512_full( &ctx.fugue, hash0, hash0, 64 ); - sph_fugue512_full( &ctx.fugue, hash1, hash1, 64 ); - sph_fugue512_full( &ctx.fugue, hash2, hash2, 64 ); - sph_fugue512_full( &ctx.fugue, hash3, hash3, 64 ); - sph_fugue512_full( &ctx.fugue, hash4, hash4, 64 ); - sph_fugue512_full( &ctx.fugue, hash5, hash5, 64 ); - sph_fugue512_full( &ctx.fugue, hash6, hash6, 64 ); - sph_fugue512_full( &ctx.fugue, hash7, hash7, 64 ); + fugue512_full( &ctx.fugue, hash0, hash0, 64 ); + fugue512_full( &ctx.fugue, hash1, hash1, 64 ); + fugue512_full( &ctx.fugue, hash2, hash2, 64 ); + fugue512_full( &ctx.fugue, hash3, hash3, 64 ); + fugue512_full( &ctx.fugue, hash4, hash4, 64 ); + fugue512_full( &ctx.fugue, hash5, hash5, 64 ); + fugue512_full( &ctx.fugue, hash6, hash6, 64 ); + fugue512_full( &ctx.fugue, hash7, hash7, 64 ); intrlv_8x64_512( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7 ); @@ -459,21 +459,21 @@ extern void hmq1725_8way_hash(void *state, const void *input) m512_zero ); if ( hash0[0] & mask ) - sph_fugue512_full( &ctx.fugue, hash0, hash0, 64 ); + fugue512_full( &ctx.fugue, hash0, hash0, 64 ); if ( hash1[0] & mask ) - sph_fugue512_full( &ctx.fugue, hash1, hash1, 64 ); + fugue512_full( &ctx.fugue, hash1, hash1, 64 ); if ( hash2[0] & mask ) - sph_fugue512_full( &ctx.fugue, hash2, hash2, 64 ); + fugue512_full( &ctx.fugue, hash2, hash2, 64 ); if ( hash3[0] & mask ) - sph_fugue512_full( &ctx.fugue, hash3, hash3, 64 ); + fugue512_full( &ctx.fugue, hash3, hash3, 64 ); if ( hash4[0] & mask ) - sph_fugue512_full( &ctx.fugue, hash4, hash4, 64 ); + fugue512_full( &ctx.fugue, hash4, hash4, 64 ); if ( hash5[0] & mask ) - sph_fugue512_full( &ctx.fugue, hash5, hash5, 64 ); + fugue512_full( &ctx.fugue, hash5, hash5, 64 ); if ( hash6[0] & mask ) - sph_fugue512_full( &ctx.fugue, hash6, hash6, 64 ); + fugue512_full( &ctx.fugue, hash6, hash6, 64 ); if ( hash7[0] & mask ) - sph_fugue512_full( &ctx.fugue, hash7, hash7, 64 ); + fugue512_full( &ctx.fugue, hash7, hash7, 64 ); intrlv_8x64_512( vhashA, hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7 ); @@ -628,7 +628,7 @@ union _hmq1725_4way_context_overlay simd_2way_context simd; hashState_echo echo; hamsi512_4way_context hamsi; - sph_fugue512_context fugue; + hashState_fugue fugue; shabal512_4way_context shabal; sph_whirlpool_context whirlpool; sha512_4way_context sha512; @@ -846,10 +846,10 @@ extern void hmq1725_4way_hash(void *state, const void *input) dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); - sph_fugue512_full( &ctx.fugue, hash0, hash0, 64 ); - sph_fugue512_full( &ctx.fugue, hash1, hash1, 64 ); - sph_fugue512_full( &ctx.fugue, hash2, hash2, 64 ); - sph_fugue512_full( &ctx.fugue, hash3, hash3, 64 ); + fugue512_full( &ctx.fugue, hash0, hash0, 64 ); + fugue512_full( &ctx.fugue, hash1, hash1, 64 ); + fugue512_full( &ctx.fugue, hash2, hash2, 64 ); + fugue512_full( &ctx.fugue, hash3, hash3, 64 ); // In this situation serial simd seems to be faster. @@ -920,13 +920,13 @@ extern void hmq1725_4way_hash(void *state, const void *input) h_mask = _mm256_movemask_epi8( vh_mask ); if ( hash0[0] & mask ) - sph_fugue512_full( &ctx.fugue, hash0, hash0, 64 ); + fugue512_full( &ctx.fugue, hash0, hash0, 64 ); if ( hash1[0] & mask ) - sph_fugue512_full( &ctx.fugue, hash1, hash1, 64 ); + fugue512_full( &ctx.fugue, hash1, hash1, 64 ); if ( hash2[0] & mask ) - sph_fugue512_full( &ctx.fugue, hash2, hash2, 64 ); + fugue512_full( &ctx.fugue, hash2, hash2, 64 ); if ( hash3[0] & mask ) - sph_fugue512_full( &ctx.fugue, hash3, hash3, 64 ); + fugue512_full( &ctx.fugue, hash3, hash3, 64 ); intrlv_4x64( vhashA, hash0, hash1, hash2, hash3, 512 ); diff --git a/algo/quark/hmq1725.c b/algo/quark/hmq1725.c index ea1ca623..41e3cfcb 100644 --- a/algo/quark/hmq1725.c +++ b/algo/quark/hmq1725.c @@ -21,9 +21,11 @@ #if defined(__AES__) #include "algo/groestl/aes_ni/hash-groestl.h" #include "algo/echo/aes_ni/hash_api.h" + #include "algo/fugue/fugue-aesni.h" #else #include "algo/groestl/sph_groestl.h" #include "algo/echo/sph_echo.h" + #include "algo/fugue/sph_fugue.h" #endif #include "algo/luffa/luffa_for_sse2.h" #include "algo/cubehash/cubehash_sse2.h" @@ -40,7 +42,6 @@ typedef struct { sph_shavite512_context shavite1, shavite2; hashState_sd simd1, simd2; sph_hamsi512_context hamsi1; - sph_fugue512_context fugue1, fugue2; sph_shabal512_context shabal1; sph_whirlpool_context whirlpool1, whirlpool2, whirlpool3, whirlpool4; SHA512_CTX sha1, sha2; @@ -48,9 +49,11 @@ typedef struct { #if defined(__AES__) hashState_echo echo1, echo2; hashState_groestl groestl1, groestl2; + hashState_fugue fugue1, fugue2; #else sph_groestl512_context groestl1, groestl2; sph_echo512_context echo1, echo2; + sph_fugue512_context fugue1, fugue2; #endif } hmq1725_ctx_holder; @@ -88,8 +91,13 @@ void init_hmq1725_ctx() sph_hamsi512_init(&hmq1725_ctx.hamsi1); +#if defined(__AES__) + fugue512_Init( &hmq1725_ctx.fugue1, 512 ); + fugue512_Init( &hmq1725_ctx.fugue2, 512 ); +#else sph_fugue512_init(&hmq1725_ctx.fugue1); sph_fugue512_init(&hmq1725_ctx.fugue2); +#endif sph_shabal512_init(&hmq1725_ctx.shabal1); @@ -235,8 +243,13 @@ extern void hmq1725hash(void *state, const void *input) sph_hamsi512 (&h_ctx.hamsi1, hashA, 64); //3 sph_hamsi512_close(&h_ctx.hamsi1, hashB); //4 +#if defined(__AES__) + fugue512_Update( &h_ctx.fugue1, hashB, 512 ); //2 //// + fugue512_Final( &h_ctx.fugue1, hashA ); //3 +#else sph_fugue512 (&h_ctx.fugue1, hashB, 64); //2 //// sph_fugue512_close(&h_ctx.fugue1, hashA); //3 +#endif if ( hashA[0] & mask ) //4 { @@ -262,8 +275,13 @@ extern void hmq1725hash(void *state, const void *input) if ( hashB[0] & mask ) //7 { +#if defined(__AES__) + fugue512_Update( &h_ctx.fugue2, hashB, 512 ); // + fugue512_Final( &h_ctx.fugue2, hashA ); //8 +#else sph_fugue512 (&h_ctx.fugue2, hashB, 64); // sph_fugue512_close(&h_ctx.fugue2, hashA); //8 +#endif } else { diff --git a/algo/scrypt/neoscrypt.c b/algo/scrypt/neoscrypt.c index 9003e59b..7cb4c828 100644 --- a/algo/scrypt/neoscrypt.c +++ b/algo/scrypt/neoscrypt.c @@ -1051,16 +1051,16 @@ int scanhash_neoscrypt( struct work *work, uint32_t _ALIGN(64) hash[8]; const uint32_t Htarg = ptarget[7]; const uint32_t first_nonce = pdata[19]; - int thr_id = mythr->id; // thr_id arg is deprecated + int thr_id = mythr->id; while (pdata[19] < max_nonce && !work_restart[thr_id].restart) { neoscrypt((uint8_t *) hash, (uint8_t *) pdata ); /* Quick hash check */ - if (hash[7] <= Htarg && fulltest_le(hash, ptarget)) { - *hashes_done = pdata[19] - first_nonce + 1; - return 1; + if (hash[7] <= Htarg && fulltest_le(hash, ptarget)) + { + submit_solution( work, hash, mythr ); } pdata[19]++; diff --git a/algo/x13/phi1612-4way.c b/algo/x13/phi1612-4way.c index 192b081a..389c87c5 100644 --- a/algo/x13/phi1612-4way.c +++ b/algo/x13/phi1612-4way.c @@ -7,7 +7,7 @@ #include "algo/jh/jh-hash-4way.h" #include "algo/cubehash/cubehash_sse2.h" #include "algo/cubehash/cube-hash-2way.h" -#include "algo/fugue/sph_fugue.h" +#include "algo/fugue/fugue-aesni.h" #include "algo/gost/sph_gost.h" #include "algo/echo/aes_ni/hash_api.h" #if defined(__VAES__) @@ -20,7 +20,7 @@ typedef struct { skein512_8way_context skein; jh512_8way_context jh; cube_4way_context cube; - sph_fugue512_context fugue; + hashState_fugue fugue; sph_gost512_context gost; #if defined(__VAES__) echo_4way_context echo; @@ -36,7 +36,7 @@ void init_phi1612_8way_ctx() skein512_8way_init( &phi1612_8way_ctx.skein ); jh512_8way_init( &phi1612_8way_ctx.jh ); cube_4way_init( &phi1612_8way_ctx.cube, 512, 16, 32 ); - sph_fugue512_init( &phi1612_8way_ctx.fugue ); + fugue512_Init( &phi1612_8way_ctx.fugue, 512 ); sph_gost512_init( &phi1612_8way_ctx.gost ); #if defined(__VAES__) echo_4way_init( &phi1612_8way_ctx.echo, 512 ); @@ -79,29 +79,14 @@ void phi1612_8way_hash( void *state, const void *input ) dintrlv_4x128_512( hash4, hash5, hash6, hash7, vhash ); // Fugue - sph_fugue512( &ctx.fugue, hash0, 64 ); - sph_fugue512_close( &ctx.fugue, hash0 ); - sph_fugue512_init( &ctx.fugue ); - sph_fugue512( &ctx.fugue, hash1, 64 ); - sph_fugue512_close( &ctx.fugue, hash1 ); - sph_fugue512_init( &ctx.fugue ); - sph_fugue512( &ctx.fugue, hash2, 64 ); - sph_fugue512_close( &ctx.fugue, hash2 ); - sph_fugue512_init( &ctx.fugue ); - sph_fugue512( &ctx.fugue, hash3, 64 ); - sph_fugue512_close( &ctx.fugue, hash3 ); - sph_fugue512_init( &ctx.fugue ); - sph_fugue512( &ctx.fugue, hash4, 64 ); - sph_fugue512_close( &ctx.fugue, hash4 ); - sph_fugue512_init( &ctx.fugue ); - sph_fugue512( &ctx.fugue, hash5, 64 ); - sph_fugue512_close( &ctx.fugue, hash5 ); - sph_fugue512_init( &ctx.fugue ); - sph_fugue512( &ctx.fugue, hash6, 64 ); - sph_fugue512_close( &ctx.fugue, hash6 ); - sph_fugue512_init( &ctx.fugue ); - sph_fugue512( &ctx.fugue, hash7, 64 ); - sph_fugue512_close( &ctx.fugue, hash7 ); + fugue512_full( &ctx.fugue, hash0, hash0, 64 ); + fugue512_full( &ctx.fugue, hash1, hash1, 64 ); + fugue512_full( &ctx.fugue, hash2, hash2, 64 ); + fugue512_full( &ctx.fugue, hash3, hash3, 64 ); + fugue512_full( &ctx.fugue, hash4, hash4, 64 ); + fugue512_full( &ctx.fugue, hash5, hash5, 64 ); + fugue512_full( &ctx.fugue, hash6, hash6, 64 ); + fugue512_full( &ctx.fugue, hash7, hash7, 64 ); // Gost sph_gost512( &ctx.gost, hash0, 64 ); @@ -223,7 +208,7 @@ typedef struct { skein512_4way_context skein; jh512_4way_context jh; cubehashParam cube; - sph_fugue512_context fugue; + hashState_fugue fugue; sph_gost512_context gost; hashState_echo echo; } phi1612_4way_ctx_holder; @@ -235,7 +220,6 @@ void init_phi1612_4way_ctx() skein512_4way_init( &phi1612_4way_ctx.skein ); jh512_4way_init( &phi1612_4way_ctx.jh ); cubehashInit( &phi1612_4way_ctx.cube, 512, 16, 32 ); - sph_fugue512_init( &phi1612_4way_ctx.fugue ); sph_gost512_init( &phi1612_4way_ctx.gost ); init_echo( &phi1612_4way_ctx.echo, 512 ); }; @@ -275,17 +259,10 @@ void phi1612_4way_hash( void *state, const void *input ) cubehashUpdateDigest( &ctx.cube, (byte*)hash3, (const byte*) hash3, 64 ); // Fugue - sph_fugue512( &ctx.fugue, hash0, 64 ); - sph_fugue512_close( &ctx.fugue, hash0 ); - sph_fugue512_init( &ctx.fugue ); - sph_fugue512( &ctx.fugue, hash1, 64 ); - sph_fugue512_close( &ctx.fugue, hash1 ); - sph_fugue512_init( &ctx.fugue ); - sph_fugue512( &ctx.fugue, hash2, 64 ); - sph_fugue512_close( &ctx.fugue, hash2 ); - sph_fugue512_init( &ctx.fugue ); - sph_fugue512( &ctx.fugue, hash3, 64 ); - sph_fugue512_close( &ctx.fugue, hash3 ); + fugue512_full( &ctx.fugue, hash0, hash0, 64 ); + fugue512_full( &ctx.fugue, hash1, hash1, 64 ); + fugue512_full( &ctx.fugue, hash2, hash2, 64 ); + fugue512_full( &ctx.fugue, hash3, hash3, 64 ); // Gost sph_gost512( &ctx.gost, hash0, 64 ); diff --git a/algo/x13/phi1612.c b/algo/x13/phi1612.c index bbb86bd4..13c79a3e 100644 --- a/algo/x13/phi1612.c +++ b/algo/x13/phi1612.c @@ -8,24 +8,28 @@ #include #include "algo/gost/sph_gost.h" #include "algo/echo/sph_echo.h" -#include "algo/fugue/sph_fugue.h" #include "algo/cubehash/cubehash_sse2.h" #include "algo/skein/sph_skein.h" #include "algo/jh/sph_jh.h" #ifdef __AES__ #include "algo/echo/aes_ni/hash_api.h" + #include "algo/fugue/fugue-aesni.h" +#else + #include "algo/echo/sph_echo.h" + #include "algo/fugue/sph_fugue.h" #endif typedef struct { sph_skein512_context skein; sph_jh512_context jh; cubehashParam cube; - sph_fugue512_context fugue; sph_gost512_context gost; #ifdef __AES__ hashState_echo echo; + hashState_fugue fugue; #else sph_echo512_context echo; + sph_fugue512_context fugue; #endif } phi_ctx_holder; @@ -42,8 +46,10 @@ void init_phi1612_ctx() sph_gost512_init( &phi_ctx.gost ); #ifdef __AES__ init_echo( &phi_ctx.echo, 512 ); + fugue512_Init( &phi_ctx.fugue, 512 ); #else sph_echo512_init( &phi_ctx.echo ); + sph_fugue512_init( &phi_ctx.fugue ); #endif } @@ -69,8 +75,13 @@ void phi1612_hash(void *output, const void *input) cubehashUpdateDigest( &ctx.cube, (byte*) hash, (const byte*)hash, 64 ); +#if defined(__AES__) + fugue512_Update( &ctx.fugue, hash, 512 ); + fugue512_Final( &ctx.fugue, hash ); +#else sph_fugue512( &ctx.fugue, (const void*)hash, 64 ); sph_fugue512_close( &ctx.fugue, (void*)hash ); +#endif sph_gost512( &ctx.gost, hash, 64 ); sph_gost512_close( &ctx.gost, hash ); diff --git a/algo/x13/skunk-4way.c b/algo/x13/skunk-4way.c index e4698395..73d0205f 100644 --- a/algo/x13/skunk-4way.c +++ b/algo/x13/skunk-4way.c @@ -5,7 +5,7 @@ #include #include "algo/skein/skein-hash-4way.h" #include "algo/gost/sph_gost.h" -#include "algo/fugue/sph_fugue.h" +#include "algo/fugue/fugue-aesni.h" #include "algo/cubehash/cubehash_sse2.h" #include "algo/cubehash/cube-hash-2way.h" @@ -14,7 +14,7 @@ typedef struct { skein512_8way_context skein; cube_4way_context cube; - sph_fugue512_context fugue; + hashState_fugue fugue; sph_gost512_context gost; } skunk_8way_ctx_holder; @@ -46,29 +46,15 @@ void skunk_8way_hash( void *output, const void *input ) cube_4way_init( &ctx.cube, 512, 16, 32 ); cube_4way_update_close( &ctx.cube, vhash, vhash, 64 ); dintrlv_4x128_512( hash4, hash5, hash6, hash7, vhash ); - - sph_fugue512( &ctx.fugue, hash0, 64 ); - sph_fugue512_close( &ctx.fugue, hash0 ); - sph_fugue512_init( &ctx.fugue ); - sph_fugue512( &ctx.fugue, hash1, 64 ); - sph_fugue512_close( &ctx.fugue, hash1 ); - sph_fugue512_init( &ctx.fugue ); - sph_fugue512( &ctx.fugue, hash2, 64 ); - sph_fugue512_close( &ctx.fugue, hash2 ); - sph_fugue512_init( &ctx.fugue ); - sph_fugue512( &ctx.fugue, hash3, 64 ); - sph_fugue512_close( &ctx.fugue, hash3 ); - sph_fugue512( &ctx.fugue, hash4, 64 ); - sph_fugue512_close( &ctx.fugue, hash4 ); - sph_fugue512_init( &ctx.fugue ); - sph_fugue512( &ctx.fugue, hash5, 64 ); - sph_fugue512_close( &ctx.fugue, hash5 ); - sph_fugue512_init( &ctx.fugue ); - sph_fugue512( &ctx.fugue, hash6, 64 ); - sph_fugue512_close( &ctx.fugue, hash6 ); - sph_fugue512_init( &ctx.fugue ); - sph_fugue512( &ctx.fugue, hash7, 64 ); - sph_fugue512_close( &ctx.fugue, hash7 ); + + fugue512_full( &ctx.fugue, hash0, hash0, 64 ); + fugue512_full( &ctx.fugue, hash1, hash1, 64 ); + fugue512_full( &ctx.fugue, hash2, hash2, 64 ); + fugue512_full( &ctx.fugue, hash3, hash3, 64 ); + fugue512_full( &ctx.fugue, hash4, hash4, 64 ); + fugue512_full( &ctx.fugue, hash5, hash5, 64 ); + fugue512_full( &ctx.fugue, hash6, hash6, 64 ); + fugue512_full( &ctx.fugue, hash7, hash7, 64 ); sph_gost512( &ctx.gost, hash0, 64 ); sph_gost512_close( &ctx.gost, output ); @@ -140,7 +126,6 @@ bool skunk_8way_thread_init() { skein512_8way_init( &skunk_8way_ctx.skein ); cube_4way_init( &skunk_8way_ctx.cube, 512, 16, 32 ); - sph_fugue512_init( &skunk_8way_ctx.fugue ); sph_gost512_init( &skunk_8way_ctx.gost ); return true; } @@ -150,7 +135,7 @@ bool skunk_8way_thread_init() typedef struct { skein512_4way_context skein; cubehashParam cube; - sph_fugue512_context fugue; + hashState_fugue fugue; sph_gost512_context gost; } skunk_4way_ctx_holder; @@ -178,17 +163,10 @@ void skunk_4way_hash( void *output, const void *input ) memcpy( &ctx.cube, &skunk_4way_ctx.cube, sizeof(cubehashParam) ); cubehashUpdateDigest( &ctx.cube, (byte*)hash3, (const byte*) hash3, 64 ); - sph_fugue512( &ctx.fugue, hash0, 64 ); - sph_fugue512_close( &ctx.fugue, hash0 ); - sph_fugue512_init( &ctx.fugue ); - sph_fugue512( &ctx.fugue, hash1, 64 ); - sph_fugue512_close( &ctx.fugue, hash1 ); - sph_fugue512_init( &ctx.fugue ); - sph_fugue512( &ctx.fugue, hash2, 64 ); - sph_fugue512_close( &ctx.fugue, hash2 ); - sph_fugue512_init( &ctx.fugue ); - sph_fugue512( &ctx.fugue, hash3, 64 ); - sph_fugue512_close( &ctx.fugue, hash3 ); + fugue512_full( &ctx.fugue, hash0, hash0, 64 ); + fugue512_full( &ctx.fugue, hash1, hash1, 64 ); + fugue512_full( &ctx.fugue, hash2, hash2, 64 ); + fugue512_full( &ctx.fugue, hash3, hash3, 64 ); sph_gost512( &ctx.gost, hash0, 64 ); sph_gost512_close( &ctx.gost, hash0 ); @@ -252,7 +230,6 @@ bool skunk_4way_thread_init() { skein512_4way_init( &skunk_4way_ctx.skein ); cubehashInit( &skunk_4way_ctx.cube, 512, 16, 32 ); - sph_fugue512_init( &skunk_4way_ctx.fugue ); sph_gost512_init( &skunk_4way_ctx.gost ); return true; } diff --git a/algo/x13/skunk-gate.c b/algo/x13/skunk-gate.c index 98bcf682..ab6c4ec8 100644 --- a/algo/x13/skunk-gate.c +++ b/algo/x13/skunk-gate.c @@ -2,7 +2,7 @@ bool register_skunk_algo( algo_gate_t* gate ) { - gate->optimizations = SSE2_OPT | AVX2_OPT | AVX512_OPT; + gate->optimizations = SSE2_OPT | AVX2_OPT | AVX512_OPT | AES_OPT; #if defined (SKUNK_8WAY) gate->miner_thread_init = (void*)&skunk_8way_thread_init; gate->scanhash = (void*)&scanhash_skunk_8way; diff --git a/algo/x13/skunk.c b/algo/x13/skunk.c index 41728657..25549ff6 100644 --- a/algo/x13/skunk.c +++ b/algo/x13/skunk.c @@ -8,13 +8,21 @@ #include #include "algo/gost/sph_gost.h" #include "algo/skein/sph_skein.h" -#include "algo/fugue/sph_fugue.h" #include "algo/cubehash/cubehash_sse2.h" +#if defined(__AES__) + #include "algo/fugue/fugue-aesni.h" +#else + #include "algo/fugue/sph_fugue.h" +#endif typedef struct { sph_skein512_context skein; cubehashParam cube; +#if defined(__AES__) + hashState_fugue fugue; +#else sph_fugue512_context fugue; +#endif sph_gost512_context gost; } skunk_ctx_holder; @@ -32,8 +40,13 @@ void skunkhash( void *output, const void *input ) cubehashUpdateDigest( &ctx.cube, (byte*) hash, (const byte*)hash, 64 ); +#if defined(__AES__) + fugue512_Update( &ctx.fugue, hash, 512 ); + fugue512_Final( &ctx.fugue, hash ); +#else sph_fugue512( &ctx.fugue, hash, 64 ); sph_fugue512_close( &ctx.fugue, hash ); +#endif sph_gost512( &ctx.gost, hash, 64 ); sph_gost512_close( &ctx.gost, hash ); @@ -87,8 +100,12 @@ bool skunk_thread_init() { sph_skein512_init( &skunk_ctx.skein ); cubehashInit( &skunk_ctx.cube, 512, 16, 32 ); - sph_fugue512_init( &skunk_ctx.fugue ); - sph_gost512_init( &skunk_ctx.gost ); +#if defined(__AES__) + fugue512_Init( &skunk_ctx.fugue, 512 ); +#else + sph_fugue512_init( &skunk_ctx.fugue ); +#endif + sph_gost512_init( &skunk_ctx.gost ); return true; } #endif diff --git a/algo/x13/x13-4way.c b/algo/x13/x13-4way.c index 6518655a..f2b0d739 100644 --- a/algo/x13/x13-4way.c +++ b/algo/x13/x13-4way.c @@ -16,7 +16,7 @@ #include "algo/simd/simd-hash-2way.h" #include "algo/echo/aes_ni/hash_api.h" #include "algo/hamsi/hamsi-hash-4way.h" -#include "algo/fugue/sph_fugue.h" +#include "algo/fugue/fugue-aesni.h" #if defined(__VAES__) #include "algo/groestl/groestl512-hash-4way.h" #include "algo/shavite/shavite-hash-4way.h" @@ -35,7 +35,7 @@ typedef struct { cube_4way_context cube; simd_4way_context simd; hamsi512_8way_context hamsi; - sph_fugue512_context fugue; + hashState_fugue fugue; #if defined(__VAES__) groestl512_4way_context groestl; shavite512_4way_context shavite; @@ -60,7 +60,7 @@ void init_x13_8way_ctx() cube_4way_init( &x13_8way_ctx.cube, 512, 16, 32 ); simd_4way_init( &x13_8way_ctx.simd, 512 ); hamsi512_8way_init( &x13_8way_ctx.hamsi ); - sph_fugue512_init( &x13_8way_ctx.fugue ); + fugue512_Init( &x13_8way_ctx.fugue, 512 ); #if defined(__VAES__) groestl512_4way_init( &x13_8way_ctx.groestl, 64 ); shavite512_4way_init( &x13_8way_ctx.shavite ); @@ -255,29 +255,29 @@ void x13_8way_hash( void *state, const void *input ) vhash ); // 13 Fugue serial - sph_fugue512( &ctx.fugue, hash0, 64 ); - sph_fugue512_close( &ctx.fugue, hash0 ); - memcpy( &ctx.fugue, &x13_8way_ctx.fugue, sizeof(sph_fugue512_context) ); - sph_fugue512( &ctx.fugue, hash1, 64 ); - sph_fugue512_close( &ctx.fugue, hash1 ); - memcpy( &ctx.fugue, &x13_8way_ctx.fugue, sizeof(sph_fugue512_context) ); - sph_fugue512( &ctx.fugue, hash2, 64 ); - sph_fugue512_close( &ctx.fugue, hash2 ); - memcpy( &ctx.fugue, &x13_8way_ctx.fugue, sizeof(sph_fugue512_context) ); - sph_fugue512( &ctx.fugue, hash3, 64 ); - sph_fugue512_close( &ctx.fugue, hash3 ); - memcpy( &ctx.fugue, &x13_8way_ctx.fugue, sizeof(sph_fugue512_context) ); - sph_fugue512( &ctx.fugue, hash4, 64 ); - sph_fugue512_close( &ctx.fugue, hash4 ); - memcpy( &ctx.fugue, &x13_8way_ctx.fugue, sizeof(sph_fugue512_context) ); - sph_fugue512( &ctx.fugue, hash5, 64 ); - sph_fugue512_close( &ctx.fugue, hash5 ); - memcpy( &ctx.fugue, &x13_8way_ctx.fugue, sizeof(sph_fugue512_context) ); - sph_fugue512( &ctx.fugue, hash6, 64 ); - sph_fugue512_close( &ctx.fugue, hash6 ); - memcpy( &ctx.fugue, &x13_8way_ctx.fugue, sizeof(sph_fugue512_context) ); - sph_fugue512( &ctx.fugue, hash7, 64 ); - sph_fugue512_close( &ctx.fugue, hash7 ); + fugue512_Update( &ctx.fugue, hash0, 512 ); + fugue512_Final( &ctx.fugue, hash0 ); + memcpy( &ctx.fugue, &x13_8way_ctx.fugue, sizeof(hashState_fugue) ); + fugue512_Update( &ctx.fugue, hash1, 512 ); + fugue512_Final( &ctx.fugue, hash1 ); + memcpy( &ctx.fugue, &x13_8way_ctx.fugue, sizeof(hashState_fugue) ); + fugue512_Update( &ctx.fugue, hash2, 512 ); + fugue512_Final( &ctx.fugue, hash2 ); + memcpy( &ctx.fugue, &x13_8way_ctx.fugue, sizeof(hashState_fugue) ); + fugue512_Update( &ctx.fugue, hash3, 512 ); + fugue512_Final( &ctx.fugue, hash3 ); + memcpy( &ctx.fugue, &x13_8way_ctx.fugue, sizeof(hashState_fugue) ); + fugue512_Update( &ctx.fugue, hash4, 512 ); + fugue512_Final( &ctx.fugue, hash4 ); + memcpy( &ctx.fugue, &x13_8way_ctx.fugue, sizeof(hashState_fugue) ); + fugue512_Update( &ctx.fugue, hash5, 512 ); + fugue512_Final( &ctx.fugue, hash5 ); + memcpy( &ctx.fugue, &x13_8way_ctx.fugue, sizeof(hashState_fugue) ); + fugue512_Update( &ctx.fugue, hash6, 512 ); + fugue512_Final( &ctx.fugue, hash6 ); + memcpy( &ctx.fugue, &x13_8way_ctx.fugue, sizeof(hashState_fugue) ); + fugue512_Update( &ctx.fugue, hash7, 512 ); + fugue512_Final( &ctx.fugue, hash7 ); memcpy( state, hash0, 32 ); memcpy( state+ 32, hash1, 32 ); @@ -344,7 +344,7 @@ typedef struct { simd_2way_context simd; hashState_echo echo; hamsi512_4way_context hamsi; - sph_fugue512_context fugue; + hashState_fugue fugue; } x13_4way_ctx_holder; x13_4way_ctx_holder x13_4way_ctx __attribute__ ((aligned (64))); @@ -363,7 +363,7 @@ void init_x13_4way_ctx() simd_2way_init( &x13_4way_ctx.simd, 512 ); init_echo( &x13_4way_ctx.echo, 512 ); hamsi512_4way_init( &x13_4way_ctx.hamsi ); - sph_fugue512_init( &x13_4way_ctx.fugue ); + fugue512_Init( &x13_4way_ctx.fugue, 512 ); }; void x13_4way_hash( void *state, const void *input ) @@ -477,17 +477,17 @@ void x13_4way_hash( void *state, const void *input ) dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); // 13 Fugue serial - sph_fugue512( &ctx.fugue, hash0, 64 ); - sph_fugue512_close( &ctx.fugue, hash0 ); - memcpy( &ctx.fugue, &x13_4way_ctx.fugue, sizeof(sph_fugue512_context) ); - sph_fugue512( &ctx.fugue, hash1, 64 ); - sph_fugue512_close( &ctx.fugue, hash1 ); - memcpy( &ctx.fugue, &x13_4way_ctx.fugue, sizeof(sph_fugue512_context) ); - sph_fugue512( &ctx.fugue, hash2, 64 ); - sph_fugue512_close( &ctx.fugue, hash2 ); - memcpy( &ctx.fugue, &x13_4way_ctx.fugue, sizeof(sph_fugue512_context) ); - sph_fugue512( &ctx.fugue, hash3, 64 ); - sph_fugue512_close( &ctx.fugue, hash3 ); + fugue512_Update( &ctx.fugue, hash0, 512 ); + fugue512_Final( &ctx.fugue, hash0 ); + memcpy( &ctx.fugue, &x13_4way_ctx.fugue, sizeof(hashState_fugue) ); + fugue512_Update( &ctx.fugue, hash1, 512 ); + fugue512_Final( &ctx.fugue, hash1 ); + memcpy( &ctx.fugue, &x13_4way_ctx.fugue, sizeof(hashState_fugue) ); + fugue512_Update( &ctx.fugue, hash2, 512 ); + fugue512_Final( &ctx.fugue, hash2 ); + memcpy( &ctx.fugue, &x13_4way_ctx.fugue, sizeof(hashState_fugue) ); + fugue512_Update( &ctx.fugue, hash3, 512 ); + fugue512_Final( &ctx.fugue, hash3 ); memcpy( state, hash0, 32 ); memcpy( state+32, hash1, 32 ); diff --git a/algo/x13/x13.c b/algo/x13/x13.c index 109729e0..ca66e007 100644 --- a/algo/x13/x13.c +++ b/algo/x13/x13.c @@ -13,7 +13,6 @@ #include "algo/skein/sph_skein.h" #include "algo/shavite/sph_shavite.h" #include "algo/hamsi/sph_hamsi.h" -#include "algo/fugue/sph_fugue.h" #include "algo/luffa/luffa_for_sse2.h" #include "algo/cubehash/cubehash_sse2.h" #include "algo/simd/nist.h" @@ -21,9 +20,11 @@ #if defined(__AES__) #include "algo/echo/aes_ni/hash_api.h" #include "algo/groestl/aes_ni/hash-groestl.h" + #include "algo/fugue/fugue-aesni.h" #else #include "algo/groestl/sph_groestl.h" #include "algo/echo/sph_echo.h" + #include "algo/fugue/sph_fugue.h" #endif typedef struct { @@ -32,9 +33,11 @@ typedef struct { #if defined(__AES__) hashState_echo echo; hashState_groestl groestl; + hashState_fugue fugue; #else sph_groestl512_context groestl; sph_echo512_context echo; + sph_fugue512_context fugue; #endif sph_jh512_context jh; sph_keccak512_context keccak; @@ -44,7 +47,6 @@ typedef struct { sph_shavite512_context shavite; hashState_sd simd; sph_hamsi512_context hamsi; - sph_fugue512_context fugue; } x13_ctx_holder; x13_ctx_holder x13_ctx; @@ -56,9 +58,11 @@ void init_x13_ctx() #if defined(__AES__) init_groestl( &x13_ctx.groestl, 64 ); init_echo( &x13_ctx.echo, 512 ); + fugue512_Init( &x13_ctx.fugue, 512 ); #else sph_groestl512_init( &x13_ctx.groestl ); sph_echo512_init( &x13_ctx.echo ); + sph_fugue512_init( &x13_ctx.fugue ); #endif sph_skein512_init( &x13_ctx.skein ); sph_jh512_init( &x13_ctx.jh ); @@ -68,7 +72,6 @@ void init_x13_ctx() sph_shavite512_init( &x13_ctx.shavite ); init_sd( &x13_ctx.simd, 512 ); sph_hamsi512_init( &x13_ctx.hamsi ); - sph_fugue512_init( &x13_ctx.fugue ); }; void x13hash(void *output, const void *input) @@ -84,11 +87,9 @@ void x13hash(void *output, const void *input) sph_bmw512_close( &ctx.bmw, hash ); #if defined(__AES__) - init_groestl( &ctx.groestl, 64 ); update_and_final_groestl( &ctx.groestl, (char*)hash, (const char*)hash, 512 ); #else - sph_groestl512_init( &ctx.groestl ); sph_groestl512( &ctx.groestl, hash, 64 ); sph_groestl512_close( &ctx.groestl, hash ); #endif @@ -125,8 +126,13 @@ void x13hash(void *output, const void *input) sph_hamsi512( &ctx.hamsi, hash, 64 ); sph_hamsi512_close( &ctx.hamsi, hash ); +#if defined(__AES__) + fugue512_Update( &ctx.fugue, hash, 512 ); + fugue512_Final( &ctx.fugue, hash ); +#else sph_fugue512( &ctx.fugue, hash, 64 ); - sph_fugue512_close( &ctx.fugue, hash ); + sph_fugue512_close( &ctx.fugue, hash ); +#endif memcpy( output, hash, 32 ); } diff --git a/algo/x13/x13bcd-4way.c b/algo/x13/x13bcd-4way.c index 188b18bc..0f978e56 100644 --- a/algo/x13/x13bcd-4way.c +++ b/algo/x13/x13bcd-4way.c @@ -16,7 +16,7 @@ #include "algo/echo/aes_ni/hash_api.h" #include "algo/sm3/sm3-hash-4way.h" #include "algo/hamsi/hamsi-hash-4way.h" -#include "algo/fugue/sph_fugue.h" +#include "algo/fugue/fugue-aesni.h" #if defined(__VAES__) #include "algo/groestl/groestl512-hash-4way.h" #include "algo/shavite/shavite-hash-4way.h" @@ -35,7 +35,7 @@ typedef struct { simd_4way_context simd; sm3_8way_ctx_t sm3; hamsi512_8way_context hamsi; - sph_fugue512_context fugue; + hashState_fugue fugue; #if defined(__VAES__) groestl512_4way_context groestl; shavite512_4way_context shavite; @@ -61,7 +61,7 @@ void init_x13bcd_8way_ctx() simd_4way_init( &x13bcd_8way_ctx.simd, 512 ); sm3_8way_init( &x13bcd_8way_ctx.sm3 ); hamsi512_8way_init( &x13bcd_8way_ctx.hamsi ); - sph_fugue512_init( &x13bcd_8way_ctx.fugue ); + fugue512_Init( &x13bcd_8way_ctx.fugue, 512 ); #if defined(__VAES__) groestl512_4way_init( &x13bcd_8way_ctx.groestl, 64 ); shavite512_4way_init( &x13bcd_8way_ctx.shavite ); @@ -257,36 +257,30 @@ void x13bcd_8way_hash( void *state, const void *input ) hash4, hash5, hash6, hash7, vhash ); // Fugue serial - sph_fugue512( &ctx.fugue, hash0, 64 ); - sph_fugue512_close( &ctx.fugue, state ); - memcpy( &ctx.fugue, &x13bcd_8way_ctx.fugue, - sizeof(sph_fugue512_context) ); - sph_fugue512( &ctx.fugue, hash1, 64 ); - sph_fugue512_close( &ctx.fugue, state+32 ); - memcpy( &ctx.fugue, &x13bcd_8way_ctx.fugue, - sizeof(sph_fugue512_context) ); - sph_fugue512( &ctx.fugue, hash2, 64 ); - sph_fugue512_close( &ctx.fugue, state+64 ); - memcpy( &ctx.fugue, &x13bcd_8way_ctx.fugue, - sizeof(sph_fugue512_context) ); - sph_fugue512( &ctx.fugue, hash3, 64 ); - sph_fugue512_close( &ctx.fugue, state+96 ); - memcpy( &ctx.fugue, &x13bcd_8way_ctx.fugue, - sizeof(sph_fugue512_context) ); - sph_fugue512( &ctx.fugue, hash4, 64 ); - sph_fugue512_close( &ctx.fugue, state+128 ); - memcpy( &ctx.fugue, &x13bcd_8way_ctx.fugue, - sizeof(sph_fugue512_context) ); - sph_fugue512( &ctx.fugue, hash5, 64 ); - sph_fugue512_close( &ctx.fugue, state+160 ); - memcpy( &ctx.fugue, &x13bcd_8way_ctx.fugue, - sizeof(sph_fugue512_context) ); - sph_fugue512( &ctx.fugue, hash6, 64 ); - sph_fugue512_close( &ctx.fugue, state+192 ); - memcpy( &ctx.fugue, &x13bcd_8way_ctx.fugue, - sizeof(sph_fugue512_context) ); - sph_fugue512( &ctx.fugue, hash7, 64 ); - sph_fugue512_close( &ctx.fugue, state+224 ); + fugue512_Update( &ctx.fugue, hash0, 512 ); + fugue512_Final( &ctx.fugue, state ); + memcpy( &ctx.fugue, &x13bcd_8way_ctx.fugue, sizeof(hashState_fugue) ); + fugue512_Update( &ctx.fugue, hash1, 512 ); + fugue512_Final( &ctx.fugue, state+32 ); + memcpy( &ctx.fugue, &x13bcd_8way_ctx.fugue, sizeof(hashState_fugue) ); + fugue512_Update( &ctx.fugue, hash2, 512 ); + fugue512_Final( &ctx.fugue, state+64 ); + memcpy( &ctx.fugue, &x13bcd_8way_ctx.fugue, sizeof(hashState_fugue) ); + fugue512_Update( &ctx.fugue, hash3, 512 ); + fugue512_Final( &ctx.fugue, state+96 ); + memcpy( &ctx.fugue, &x13bcd_8way_ctx.fugue, sizeof(hashState_fugue) ); + fugue512_Update( &ctx.fugue, hash4, 512 ); + fugue512_Final( &ctx.fugue, state+128 ); + memcpy( &ctx.fugue, &x13bcd_8way_ctx.fugue, sizeof(hashState_fugue) ); + fugue512_Update( &ctx.fugue, hash5, 512 ); + fugue512_Final( &ctx.fugue, state+160 ); + memcpy( &ctx.fugue, &x13bcd_8way_ctx.fugue, sizeof(hashState_fugue) ); + fugue512_Update( &ctx.fugue, hash6, 512 ); + fugue512_Final( &ctx.fugue, state+192 ); + memcpy( &ctx.fugue, &x13bcd_8way_ctx.fugue, sizeof(hashState_fugue) ); + fugue512_Update( &ctx.fugue, hash7, 512 ); + fugue512_Final( &ctx.fugue, state+224 ); + } int scanhash_x13bcd_8way( struct work *work, uint32_t max_nonce, @@ -346,7 +340,7 @@ typedef struct { hashState_echo echo; sm3_4way_ctx_t sm3; hamsi512_4way_context hamsi; - sph_fugue512_context fugue; + hashState_fugue fugue; } x13bcd_4way_ctx_holder; x13bcd_4way_ctx_holder x13bcd_4way_ctx __attribute__ ((aligned (64))); @@ -366,7 +360,7 @@ void init_x13bcd_4way_ctx() init_echo( &x13bcd_4way_ctx.echo, 512 ); sm3_4way_init( &x13bcd_4way_ctx.sm3 ); hamsi512_4way_init( &x13bcd_4way_ctx.hamsi ); - sph_fugue512_init( &x13bcd_4way_ctx.fugue ); + fugue512_Init( &x13bcd_4way_ctx.fugue, 512 ); }; void x13bcd_4way_hash( void *state, const void *input ) @@ -489,20 +483,17 @@ void x13bcd_4way_hash( void *state, const void *input ) dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); // Fugue serial - sph_fugue512( &ctx.fugue, hash0, 64 ); - sph_fugue512_close( &ctx.fugue, hash0 ); - memcpy( &ctx.fugue, &x13bcd_4way_ctx.fugue, - sizeof(sph_fugue512_context) ); - sph_fugue512( &ctx.fugue, hash1, 64 ); - sph_fugue512_close( &ctx.fugue, hash1 ); - memcpy( &ctx.fugue, &x13bcd_4way_ctx.fugue, - sizeof(sph_fugue512_context) ); - sph_fugue512( &ctx.fugue, hash2, 64 ); - sph_fugue512_close( &ctx.fugue, hash2 ); - memcpy( &ctx.fugue, &x13bcd_4way_ctx.fugue, - sizeof(sph_fugue512_context) ); - sph_fugue512( &ctx.fugue, hash3, 64 ); - sph_fugue512_close( &ctx.fugue, hash3 ); + fugue512_Update( &ctx.fugue, hash0, 512 ); + fugue512_Final( &ctx.fugue, hash0 ); + memcpy( &ctx.fugue, &x13bcd_4way_ctx.fugue, sizeof(hashState_fugue) ); + fugue512_Update( &ctx.fugue, hash1, 512 ); + fugue512_Final( &ctx.fugue, hash1 ); + memcpy( &ctx.fugue, &x13bcd_4way_ctx.fugue, sizeof(hashState_fugue) ); + fugue512_Update( &ctx.fugue, hash2, 512 ); + fugue512_Final( &ctx.fugue, hash2 ); + memcpy( &ctx.fugue, &x13bcd_4way_ctx.fugue, sizeof(hashState_fugue) ); + fugue512_Update( &ctx.fugue, hash3, 512 ); + fugue512_Final( &ctx.fugue, hash3 ); memcpy( state, hash0, 32 ); memcpy( state+32, hash1, 32 ); diff --git a/algo/x13/x13bcd.c b/algo/x13/x13bcd.c index d53e37b1..c397bb74 100644 --- a/algo/x13/x13bcd.c +++ b/algo/x13/x13bcd.c @@ -14,16 +14,17 @@ #include "algo/skein/sph_skein.h" #include "algo/shavite/sph_shavite.h" #include "algo/hamsi/sph_hamsi.h" -#include "algo/fugue/sph_fugue.h" #include "algo/cubehash/cubehash_sse2.h" #include "algo/simd/nist.h" #if defined(__AES__) #include "algo/echo/aes_ni/hash_api.h" #include "algo/groestl/aes_ni/hash-groestl.h" + #include "algo/fugue/fugue-aesni.h" #else #include "algo/groestl/sph_groestl.h" #include "algo/echo/sph_echo.h" + #include "algo/fugue/sph_fugue.h" #endif typedef struct { @@ -32,9 +33,11 @@ typedef struct { #if defined(__AES__) hashState_echo echo; hashState_groestl groestl; + hashState_fugue fugue; #else sph_groestl512_context groestl; sph_echo512_context echo; + sph_fugue512_context fugue; #endif sph_jh512_context jh; sph_keccak512_context keccak; @@ -43,7 +46,6 @@ typedef struct { sph_shavite512_context shavite; hashState_sd simd; sph_hamsi512_context hamsi; - sph_fugue512_context fugue; sm3_ctx_t sm3; } x13bcd_ctx_holder; @@ -56,9 +58,11 @@ void init_x13bcd_ctx() #if defined(__AES__) init_groestl( &x13bcd_ctx.groestl, 64 ); init_echo( &x13bcd_ctx.echo, 512 ); + fugue512_Init( &x13bcd_ctx.fugue, 512 ); #else sph_groestl512_init( &x13bcd_ctx.groestl ); sph_echo512_init( &x13bcd_ctx.echo ); + sph_fugue512_init( &x13bcd_ctx.fugue ); #endif sph_skein512_init( &x13bcd_ctx.skein ); sph_jh512_init( &x13bcd_ctx.jh ); @@ -68,7 +72,6 @@ void init_x13bcd_ctx() init_sd( &x13bcd_ctx.simd,512 ); sm3_init( &x13bcd_ctx.sm3 ); sph_hamsi512_init( &x13bcd_ctx.hamsi ); - sph_fugue512_init( &x13bcd_ctx.fugue ); }; void x13bcd_hash(void *output, const void *input) @@ -129,8 +132,13 @@ void x13bcd_hash(void *output, const void *input) sph_hamsi512( &ctx.hamsi, hash, 64 ); sph_hamsi512_close( &ctx.hamsi, hash ); +#if defined(__AES__) + fugue512_Update( &ctx.fugue, hash, 512 ); + fugue512_Final( &ctx.fugue, hash ); +#else sph_fugue512( &ctx.fugue, hash, 64 ); sph_fugue512_close( &ctx.fugue, hash ); +#endif memcpy( output, hash, 32 ); } diff --git a/algo/x14/x14-4way.c b/algo/x14/x14-4way.c index 5ac8d8a5..e81b901a 100644 --- a/algo/x14/x14-4way.c +++ b/algo/x14/x14-4way.c @@ -17,7 +17,7 @@ #include "algo/echo/aes_ni/hash_api.h" #include "algo/echo/sph_echo.h" #include "algo/hamsi/hamsi-hash-4way.h" -#include "algo/fugue/sph_fugue.h" +#include "algo/fugue/fugue-aesni.h" #include "algo/shabal/shabal-hash-4way.h" #if defined(__VAES__) #include "algo/groestl/groestl512-hash-4way.h" @@ -37,7 +37,7 @@ typedef struct { cube_4way_context cube; simd_4way_context simd; hamsi512_8way_context hamsi; - sph_fugue512_context fugue; + hashState_fugue fugue; shabal512_8way_context shabal; #if defined(__VAES__) groestl512_4way_context groestl; @@ -63,7 +63,7 @@ void init_x14_8way_ctx() cube_4way_init( &x14_8way_ctx.cube, 512, 16, 32 ); simd_4way_init( &x14_8way_ctx.simd, 512 ); hamsi512_8way_init( &x14_8way_ctx.hamsi ); - sph_fugue512_init( &x14_8way_ctx.fugue ); + fugue512_Init( &x14_8way_ctx.fugue, 512 ); shabal512_8way_init( &x14_8way_ctx.shabal ); #if defined(__VAES__) groestl512_4way_init( &x14_8way_ctx.groestl, 64 ); @@ -259,29 +259,29 @@ void x14_8way_hash( void *state, const void *input ) vhash ); // 13 Fugue serial - sph_fugue512( &ctx.fugue, hash0, 64 ); - sph_fugue512_close( &ctx.fugue, hash0 ); - memcpy( &ctx.fugue, &x14_8way_ctx.fugue, sizeof(sph_fugue512_context) ); - sph_fugue512( &ctx.fugue, hash1, 64 ); - sph_fugue512_close( &ctx.fugue, hash1 ); - memcpy( &ctx.fugue, &x14_8way_ctx.fugue, sizeof(sph_fugue512_context) ); - sph_fugue512( &ctx.fugue, hash2, 64 ); - sph_fugue512_close( &ctx.fugue, hash2 ); - memcpy( &ctx.fugue, &x14_8way_ctx.fugue, sizeof(sph_fugue512_context) ); - sph_fugue512( &ctx.fugue, hash3, 64 ); - sph_fugue512_close( &ctx.fugue, hash3 ); - memcpy( &ctx.fugue, &x14_8way_ctx.fugue, sizeof(sph_fugue512_context) ); - sph_fugue512( &ctx.fugue, hash4, 64 ); - sph_fugue512_close( &ctx.fugue, hash4 ); - memcpy( &ctx.fugue, &x14_8way_ctx.fugue, sizeof(sph_fugue512_context) ); - sph_fugue512( &ctx.fugue, hash5, 64 ); - sph_fugue512_close( &ctx.fugue, hash5 ); - memcpy( &ctx.fugue, &x14_8way_ctx.fugue, sizeof(sph_fugue512_context) ); - sph_fugue512( &ctx.fugue, hash6, 64 ); - sph_fugue512_close( &ctx.fugue, hash6 ); - memcpy( &ctx.fugue, &x14_8way_ctx.fugue, sizeof(sph_fugue512_context) ); - sph_fugue512( &ctx.fugue, hash7, 64 ); - sph_fugue512_close( &ctx.fugue, hash7 ); + fugue512_Update( &ctx.fugue, hash0, 512 ); + fugue512_Final( &ctx.fugue, hash0 ); + memcpy( &ctx.fugue, &x14_8way_ctx.fugue, sizeof(hashState_fugue) ); + fugue512_Update( &ctx.fugue, hash1, 512 ); + fugue512_Final( &ctx.fugue, hash1 ); + memcpy( &ctx.fugue, &x14_8way_ctx.fugue, sizeof(hashState_fugue) ); + fugue512_Update( &ctx.fugue, hash2, 512 ); + fugue512_Final( &ctx.fugue, hash2 ); + memcpy( &ctx.fugue, &x14_8way_ctx.fugue, sizeof(hashState_fugue) ); + fugue512_Update( &ctx.fugue, hash3, 512 ); + fugue512_Final( &ctx.fugue, hash3 ); + memcpy( &ctx.fugue, &x14_8way_ctx.fugue, sizeof(hashState_fugue) ); + fugue512_Update( &ctx.fugue, hash4, 512 ); + fugue512_Final( &ctx.fugue, hash4 ); + memcpy( &ctx.fugue, &x14_8way_ctx.fugue, sizeof(hashState_fugue) ); + fugue512_Update( &ctx.fugue, hash5, 512 ); + fugue512_Final( &ctx.fugue, hash5 ); + memcpy( &ctx.fugue, &x14_8way_ctx.fugue, sizeof(hashState_fugue) ); + fugue512_Update( &ctx.fugue, hash6, 512 ); + fugue512_Final( &ctx.fugue, hash6 ); + memcpy( &ctx.fugue, &x14_8way_ctx.fugue, sizeof(hashState_fugue) ); + fugue512_Update( &ctx.fugue, hash7, 512 ); + fugue512_Final( &ctx.fugue, hash7 ); // 14 Shabal, parallel 32 bit intrlv_8x32_512( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6, @@ -348,7 +348,7 @@ typedef struct { simd_2way_context simd; hashState_echo echo; hamsi512_4way_context hamsi; - sph_fugue512_context fugue; + hashState_fugue fugue; shabal512_4way_context shabal; } x14_4way_ctx_holder; @@ -368,7 +368,7 @@ void init_x14_4way_ctx() simd_2way_init( &x14_4way_ctx.simd, 512 ); init_echo( &x14_4way_ctx.echo, 512 ); hamsi512_4way_init( &x14_4way_ctx.hamsi ); - sph_fugue512_init( &x14_4way_ctx.fugue ); + fugue512_Init( &x14_4way_ctx.fugue, 512 ); shabal512_4way_init( &x14_4way_ctx.shabal ); }; @@ -483,17 +483,17 @@ void x14_4way_hash( void *state, const void *input ) dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); // 13 Fugue serial - sph_fugue512( &ctx.fugue, hash0, 64 ); - sph_fugue512_close( &ctx.fugue, hash0 ); - memcpy( &ctx.fugue, &x14_4way_ctx.fugue, sizeof(sph_fugue512_context) ); - sph_fugue512( &ctx.fugue, hash1, 64 ); - sph_fugue512_close( &ctx.fugue, hash1 ); - memcpy( &ctx.fugue, &x14_4way_ctx.fugue, sizeof(sph_fugue512_context) ); - sph_fugue512( &ctx.fugue, hash2, 64 ); - sph_fugue512_close( &ctx.fugue, hash2 ); - memcpy( &ctx.fugue, &x14_4way_ctx.fugue, sizeof(sph_fugue512_context) ); - sph_fugue512( &ctx.fugue, hash3, 64 ); - sph_fugue512_close( &ctx.fugue, hash3 ); + fugue512_Update( &ctx.fugue, hash0, 512 ); + fugue512_Final( &ctx.fugue, hash0 ); + memcpy( &ctx.fugue, &x14_4way_ctx.fugue, sizeof(hashState_fugue) ); + fugue512_Update( &ctx.fugue, hash1, 512 ); + fugue512_Final( &ctx.fugue, hash1 ); + memcpy( &ctx.fugue, &x14_4way_ctx.fugue, sizeof(hashState_fugue) ); + fugue512_Update( &ctx.fugue, hash2, 512 ); + fugue512_Final( &ctx.fugue, hash2 ); + memcpy( &ctx.fugue, &x14_4way_ctx.fugue, sizeof(hashState_fugue) ); + fugue512_Update( &ctx.fugue, hash3, 512 ); + fugue512_Final( &ctx.fugue, hash3 ); // 14 Shabal, parallel 32 bit intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 512 ); diff --git a/algo/x14/x14.c b/algo/x14/x14.c index 83eab967..fdbcacb7 100644 --- a/algo/x14/x14.c +++ b/algo/x14/x14.c @@ -13,7 +13,6 @@ #include "algo/skein/sph_skein.h" #include "algo/shavite/sph_shavite.h" #include "algo/hamsi/sph_hamsi.h" -#include "algo/fugue/sph_fugue.h" #include "algo/shabal/sph_shabal.h" #include "algo/luffa/luffa_for_sse2.h" #include "algo/cubehash/cubehash_sse2.h" @@ -21,9 +20,11 @@ #if defined(__AES__) #include "algo/echo/aes_ni/hash_api.h" #include "algo/groestl/aes_ni/hash-groestl.h" + #include "algo/fugue/fugue-aesni.h" #else #include "algo/groestl/sph_groestl.h" #include "algo/echo/sph_echo.h" + #include "algo/fugue/sph_fugue.h" #endif typedef struct { @@ -32,9 +33,11 @@ typedef struct { #if defined(__AES__) hashState_groestl groestl; hashState_echo echo; + hashState_fugue fugue; #else sph_groestl512_context groestl; sph_echo512_context echo; + sph_fugue512_context fugue; #endif sph_jh512_context jh; sph_keccak512_context keccak; @@ -44,7 +47,6 @@ typedef struct { sph_shavite512_context shavite; hashState_sd simd; sph_hamsi512_context hamsi; - sph_fugue512_context fugue; sph_shabal512_context shabal; } x14_ctx_holder; @@ -57,9 +59,11 @@ void init_x14_ctx() #if defined(__AES__) init_groestl( &x14_ctx.groestl, 64 ); init_echo( &x14_ctx.echo, 512 ); + fugue512_Init( &x14_ctx.fugue, 512 ); #else sph_groestl512_init( &x14_ctx.groestl ); sph_echo512_init( &x14_ctx.echo ); + sph_fugue512_init( &x14_ctx.fugue ); #endif sph_skein512_init( &x14_ctx.skein ); sph_jh512_init( &x14_ctx.jh ); @@ -69,7 +73,6 @@ void init_x14_ctx() sph_shavite512_init( &x14_ctx.shavite ); init_sd( &x14_ctx.simd,512 ); sph_hamsi512_init( &x14_ctx.hamsi ); - sph_fugue512_init( &x14_ctx.fugue ); sph_shabal512_init( &x14_ctx.shabal ); }; @@ -125,8 +128,13 @@ void x14hash(void *output, const void *input) sph_hamsi512(&ctx.hamsi, hash, 64); sph_hamsi512_close(&ctx.hamsi, hash); +#if defined(__AES__) + fugue512_Update( &ctx.fugue, hash, 512 ); + fugue512_Final( &ctx.fugue, hash ); +#else sph_fugue512(&ctx.fugue, hash, 64); sph_fugue512_close(&ctx.fugue, hash); +#endif sph_shabal512( &ctx.shabal, hash, 64 ); sph_shabal512_close( &ctx.shabal, hash ); diff --git a/algo/x15/x15-4way.c b/algo/x15/x15-4way.c index 7fff408a..281f87ba 100644 --- a/algo/x15/x15-4way.c +++ b/algo/x15/x15-4way.c @@ -17,7 +17,7 @@ #include "algo/echo/aes_ni/hash_api.h" #include "algo/echo/sph_echo.h" #include "algo/hamsi/hamsi-hash-4way.h" -#include "algo/fugue/sph_fugue.h" +#include "algo/fugue/fugue-aesni.h" #include "algo/shabal/shabal-hash-4way.h" #include "algo/whirlpool/sph_whirlpool.h" #if defined(__VAES__) @@ -38,7 +38,7 @@ typedef struct { cube_4way_context cube; simd_4way_context simd; hamsi512_8way_context hamsi; - sph_fugue512_context fugue; + hashState_fugue fugue; shabal512_8way_context shabal; sph_whirlpool_context whirlpool; #if defined(__VAES__) @@ -65,7 +65,7 @@ void init_x15_8way_ctx() cube_4way_init( &x15_8way_ctx.cube, 512, 16, 32 ); simd_4way_init( &x15_8way_ctx.simd, 512 ); hamsi512_8way_init( &x15_8way_ctx.hamsi ); - sph_fugue512_init( &x15_8way_ctx.fugue ); + fugue512_Init( &x15_8way_ctx.fugue, 512 ); shabal512_8way_init( &x15_8way_ctx.shabal ); sph_whirlpool_init( &x15_8way_ctx.whirlpool ); #if defined(__VAES__) @@ -260,30 +260,29 @@ void x15_8way_hash( void *state, const void *input ) vhash ); // 13 Fugue - sph_fugue512( &ctx.fugue, hash0, 64 ); - sph_fugue512_close( &ctx.fugue, hash0 ); - memcpy( &ctx.fugue, &x15_8way_ctx.fugue, sizeof(sph_fugue512_context) ); - sph_fugue512( &ctx.fugue, hash1, 64 ); - sph_fugue512_close( &ctx.fugue, hash1 ); - memcpy( &ctx.fugue, &x15_8way_ctx.fugue, sizeof(sph_fugue512_context) ); - sph_fugue512( &ctx.fugue, hash2, 64 ); - sph_fugue512_close( &ctx.fugue, hash2 ); - memcpy( &ctx.fugue, &x15_8way_ctx.fugue, sizeof(sph_fugue512_context) ); - sph_fugue512( &ctx.fugue, hash3, 64 ); - sph_fugue512_close( &ctx.fugue, hash3 ); - memcpy( &ctx.fugue, &x15_8way_ctx.fugue, sizeof(sph_fugue512_context) ); - sph_fugue512( &ctx.fugue, hash4, 64 ); - sph_fugue512_close( &ctx.fugue, hash4 ); - memcpy( &ctx.fugue, &x15_8way_ctx.fugue, sizeof(sph_fugue512_context) ); - sph_fugue512( &ctx.fugue, hash5, 64 ); - sph_fugue512_close( &ctx.fugue, hash5 ); - memcpy( &ctx.fugue, &x15_8way_ctx.fugue, sizeof(sph_fugue512_context) ); - sph_fugue512( &ctx.fugue, hash6, 64 ); - sph_fugue512_close( &ctx.fugue, hash6 ); - memcpy( &ctx.fugue, &x15_8way_ctx.fugue, sizeof(sph_fugue512_context) ); - sph_fugue512( &ctx.fugue, hash7, 64 ); - sph_fugue512_close( &ctx.fugue, hash7 ); - + fugue512_Update( &ctx.fugue, hash0, 512 ); + fugue512_Final( &ctx.fugue, hash0 ); + memcpy( &ctx.fugue, &x15_8way_ctx.fugue, sizeof(hashState_fugue) ); + fugue512_Update( &ctx.fugue, hash1, 512 ); + fugue512_Final( &ctx.fugue, hash1 ); + memcpy( &ctx.fugue, &x15_8way_ctx.fugue, sizeof(hashState_fugue) ); + fugue512_Update( &ctx.fugue, hash2, 512 ); + fugue512_Final( &ctx.fugue, hash2 ); + memcpy( &ctx.fugue, &x15_8way_ctx.fugue, sizeof(hashState_fugue) ); + fugue512_Update( &ctx.fugue, hash3, 512 ); + fugue512_Final( &ctx.fugue, hash3 ); + memcpy( &ctx.fugue, &x15_8way_ctx.fugue, sizeof(hashState_fugue) ); + fugue512_Update( &ctx.fugue, hash4, 512 ); + fugue512_Final( &ctx.fugue, hash4 ); + memcpy( &ctx.fugue, &x15_8way_ctx.fugue, sizeof(hashState_fugue) ); + fugue512_Update( &ctx.fugue, hash5, 512 ); + fugue512_Final( &ctx.fugue, hash5 ); + memcpy( &ctx.fugue, &x15_8way_ctx.fugue, sizeof(hashState_fugue) ); + fugue512_Update( &ctx.fugue, hash6, 512 ); + fugue512_Final( &ctx.fugue, hash6 ); + memcpy( &ctx.fugue, &x15_8way_ctx.fugue, sizeof(hashState_fugue) ); + fugue512_Update( &ctx.fugue, hash7, 512 ); + fugue512_Final( &ctx.fugue, hash7 ); // 14 Shabal, parallel 32 bit intrlv_8x32_512( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6, @@ -387,7 +386,7 @@ typedef struct { simd_2way_context simd; hashState_echo echo; hamsi512_4way_context hamsi; - sph_fugue512_context fugue; + hashState_fugue fugue; shabal512_4way_context shabal; sph_whirlpool_context whirlpool; } x15_4way_ctx_holder; @@ -408,7 +407,7 @@ void init_x15_4way_ctx() simd_2way_init( &x15_4way_ctx.simd, 512 ); init_echo( &x15_4way_ctx.echo, 512 ); hamsi512_4way_init( &x15_4way_ctx.hamsi ); - sph_fugue512_init( &x15_4way_ctx.fugue ); + fugue512_Init( &x15_4way_ctx.fugue, 512 ); shabal512_4way_init( &x15_4way_ctx.shabal ); sph_whirlpool_init( &x15_4way_ctx.whirlpool ); }; @@ -524,17 +523,17 @@ void x15_4way_hash( void *state, const void *input ) dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); // 13 Fugue - sph_fugue512( &ctx.fugue, hash0, 64 ); - sph_fugue512_close( &ctx.fugue, hash0 ); - memcpy( &ctx.fugue, &x15_4way_ctx.fugue, sizeof(sph_fugue512_context) ); - sph_fugue512( &ctx.fugue, hash1, 64 ); - sph_fugue512_close( &ctx.fugue, hash1 ); - memcpy( &ctx.fugue, &x15_4way_ctx.fugue, sizeof(sph_fugue512_context) ); - sph_fugue512( &ctx.fugue, hash2, 64 ); - sph_fugue512_close( &ctx.fugue, hash2 ); - memcpy( &ctx.fugue, &x15_4way_ctx.fugue, sizeof(sph_fugue512_context) ); - sph_fugue512( &ctx.fugue, hash3, 64 ); - sph_fugue512_close( &ctx.fugue, hash3 ); + fugue512_Update( &ctx.fugue, hash0, 512 ); + fugue512_Final( &ctx.fugue, hash0 ); + memcpy( &ctx.fugue, &x15_4way_ctx.fugue, sizeof(hashState_fugue) ); + fugue512_Update( &ctx.fugue, hash1, 512 ); + fugue512_Final( &ctx.fugue, hash1 ); + memcpy( &ctx.fugue, &x15_4way_ctx.fugue, sizeof(hashState_fugue) ); + fugue512_Update( &ctx.fugue, hash2, 512 ); + fugue512_Final( &ctx.fugue, hash2 ); + memcpy( &ctx.fugue, &x15_4way_ctx.fugue, sizeof(hashState_fugue) ); + fugue512_Update( &ctx.fugue, hash3, 512 ); + fugue512_Final( &ctx.fugue, hash3 ); // 14 Shabal, parallel 32 bit intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 512 ); diff --git a/algo/x15/x15.c b/algo/x15/x15.c index c739e910..73d64db9 100644 --- a/algo/x15/x15.c +++ b/algo/x15/x15.c @@ -23,9 +23,11 @@ #if defined(__AES__) #include "algo/echo/aes_ni/hash_api.h" #include "algo/groestl/aes_ni/hash-groestl.h" + #include "algo/fugue/fugue-aesni.h" #else #include "algo/groestl/sph_groestl.h" #include "algo/echo/sph_echo.h" + #include "algo/fugue/sph_fugue.h" #endif typedef struct { @@ -34,9 +36,11 @@ typedef struct { #if defined(__AES__) hashState_echo echo; hashState_groestl groestl; + hashState_fugue fugue; #else sph_groestl512_context groestl; sph_echo512_context echo; + sph_fugue512_context fugue; #endif sph_jh512_context jh; sph_keccak512_context keccak; @@ -46,7 +50,6 @@ typedef struct { sph_shavite512_context shavite; hashState_sd simd; sph_hamsi512_context hamsi; - sph_fugue512_context fugue; sph_shabal512_context shabal; sph_whirlpool_context whirlpool; } x15_ctx_holder; @@ -60,9 +63,11 @@ void init_x15_ctx() #if defined(__AES__) init_groestl( &x15_ctx.groestl, 64 ); init_echo( &x15_ctx.echo, 512 ); + fugue512_Init( &x15_ctx.fugue, 512 ); #else sph_groestl512_init( &x15_ctx.groestl ); sph_echo512_init( &x15_ctx.echo ); + sph_fugue512_init( &x15_ctx.fugue ); #endif sph_skein512_init( &x15_ctx.skein ); sph_jh512_init( &x15_ctx.jh ); @@ -72,7 +77,6 @@ void init_x15_ctx() sph_shavite512_init( &x15_ctx.shavite ); init_sd( &x15_ctx.simd, 512 ); sph_hamsi512_init( &x15_ctx.hamsi ); - sph_fugue512_init( &x15_ctx.fugue ); sph_shabal512_init( &x15_ctx.shabal ); sph_whirlpool_init( &x15_ctx.whirlpool ); }; @@ -131,8 +135,13 @@ void x15hash(void *output, const void *input) sph_hamsi512( &ctx.hamsi, hash, 64 ); sph_hamsi512_close( &ctx.hamsi, hash ); +#if defined(__AES__) + fugue512_Update( &ctx.fugue, hash, 512 ); + fugue512_Final( &ctx.fugue, hash ); +#else sph_fugue512( &ctx.fugue, hash, 64 ); sph_fugue512_close( &ctx.fugue, hash ); +#endif sph_shabal512( &ctx.shabal, hash, 64 ); sph_shabal512_close( &ctx.shabal, hash ); diff --git a/algo/x16/hex.c b/algo/x16/hex.c index bb08526c..ada1ca71 100644 --- a/algo/x16/hex.c +++ b/algo/x16/hex.c @@ -6,30 +6,6 @@ */ #include "x16r-gate.h" -#include -#include -#include -#include "algo/blake/sph_blake.h" -#include "algo/bmw/sph_bmw.h" -#include "algo/groestl/sph_groestl.h" -#include "algo/jh/sph_jh.h" -#include "algo/keccak/sph_keccak.h" -#include "algo/skein/sph_skein.h" -#include "algo/shavite/sph_shavite.h" -#include "algo/luffa/luffa_for_sse2.h" -#include "algo/cubehash/cubehash_sse2.h" -#include "algo/simd/nist.h" -#include "algo/echo/sph_echo.h" -#include "algo/hamsi/sph_hamsi.h" -#include "algo/fugue/sph_fugue.h" -#include "algo/shabal/sph_shabal.h" -#include "algo/whirlpool/sph_whirlpool.h" -#include -#if defined(__AES__) - #include "algo/echo/aes_ni/hash_api.h" - #include "algo/groestl/aes_ni/hash-groestl.h" -#endif - static void hex_getAlgoString(const uint32_t* prevblock, char *output) { char *sptr = output; @@ -47,34 +23,6 @@ static void hex_getAlgoString(const uint32_t* prevblock, char *output) *sptr = '\0'; } -/* -union _hex_context_overlay -{ -#if defined(__AES__) - hashState_echo echo; - hashState_groestl groestl; -#else - sph_groestl512_context groestl; - sph_echo512_context echo; -#endif - sph_blake512_context blake; - sph_bmw512_context bmw; - sph_skein512_context skein; - sph_jh512_context jh; - sph_keccak512_context keccak; - hashState_luffa luffa; - cubehashParam cube; - shavite512_context shavite; - hashState_sd simd; - sph_hamsi512_context hamsi; - sph_fugue512_context fugue; - sph_shabal512_context shabal; - sph_whirlpool_context whirlpool; - SHA512_CTX sha512; -}; -typedef union _hex_context_overlay hex_context_overlay; -*/ - static __thread x16r_context_overlay hex_ctx; int hex_hash( void* output, const void* input, int thrid ) @@ -187,8 +135,12 @@ int hex_hash( void* output, const void* input, int thrid ) sph_hamsi512_close( &ctx.hamsi, hash ); break; case FUGUE: +#if defined(__AES__) + fugue512_full( &ctx.fugue, hash, in, size ); +#else sph_fugue512_full( &ctx.fugue, hash, in, size ); - break; +#endif + break; case SHABAL: if ( i == 0 ) sph_shabal512( &ctx.shabal, in+64, 16 ); diff --git a/algo/x16/minotaur.c b/algo/x16/minotaur.c index 069bf971..99575640 100644 --- a/algo/x16/minotaur.c +++ b/algo/x16/minotaur.c @@ -15,16 +15,17 @@ #include "algo/cubehash/cubehash_sse2.h" #include "algo/simd/nist.h" #include "algo/hamsi/sph_hamsi.h" -#include "algo/fugue/sph_fugue.h" #include "algo/shabal/sph_shabal.h" #include "algo/whirlpool/sph_whirlpool.h" #include #if defined(__AES__) #include "algo/echo/aes_ni/hash_api.h" #include "algo/groestl/aes_ni/hash-groestl.h" + #include "algo/fugue/fugue-aesni.h" #else #include "algo/echo/sph_echo.h" #include "algo/groestl/sph_groestl.h" + #include "algo/fugue/sph_fugue.h" #endif // Config @@ -34,13 +35,16 @@ typedef struct TortureNode TortureNode; typedef struct TortureGarden TortureGarden; // Graph of hash algos plus SPH contexts -struct TortureGarden { +struct TortureGarden +{ #if defined(__AES__) hashState_echo echo; hashState_groestl groestl; + hashState_fugue fugue; #else - sph_echo512_context echo; - sph_groestl512_context groestl; + sph_echo512_context echo; + sph_groestl512_context groestl; + sph_fugue512_context fugue; #endif sph_blake512_context blake; sph_bmw512_context bmw; @@ -52,15 +56,13 @@ struct TortureGarden { shavite512_context shavite; hashState_sd simd; sph_hamsi512_context hamsi; - sph_fugue512_context fugue; sph_shabal512_context shabal; sph_whirlpool_context whirlpool; SHA512_CTX sha512; struct TortureNode { unsigned int algo; - TortureNode *childLeft; - TortureNode *childRight; + TortureNode *child[2]; } nodes[22]; } __attribute__ ((aligned (64))); @@ -97,10 +99,12 @@ static void get_hash( void *output, const void *input, TortureGarden *garden, #endif break; case 4: - sph_fugue512_init(&garden->fugue); - sph_fugue512(&garden->fugue, input, 64); - sph_fugue512_close(&garden->fugue, hash); - break; +#if defined(__AES__) + fugue512_full( &garden->fugue, hash, input, 64 ); +#else + sph_fugue512_full( &garden->fugue, hash, input, 64 ); +#endif + break; case 5: #if defined(__AES__) groestl512_full( &garden->groestl, (char*)hash, (char*)input, 512 ); @@ -162,68 +166,61 @@ static void get_hash( void *output, const void *input, TortureGarden *garden, break; } - // Output the hash memcpy(output, hash, 64); } -// Recursively traverse a given torture garden starting with a given hash and given node within the garden. The hash is overwritten with the final hash. -static void traverse_garden( TortureGarden *garden, void *hash, - TortureNode *node ) -{ - unsigned char partialHash[64] __attribute__ ((aligned (64))); - get_hash(partialHash, hash, garden, node->algo); - - if ( partialHash[63] % 2 == 0 ) - { // Last byte of output hash is even - if ( node->childLeft != NULL ) - traverse_garden( garden, partialHash, node->childLeft ); - } - else - { // Last byte of output hash is odd - if ( node->childRight != NULL ) - traverse_garden( garden, partialHash, node->childRight ); - } - - memcpy( hash, partialHash, 64 ); -} - -// Associate child nodes with a parent node -static inline void link_nodes( TortureNode *parent, TortureNode *childLeft, - TortureNode *childRight ) -{ - parent->childLeft = childLeft; - parent->childRight = childRight; -} - static __thread TortureGarden garden; bool initialize_torture_garden() { // Create torture garden nodes. Note that both sides of 19 and 20 lead to 21, and 21 has no children (to make traversal complete). - link_nodes(&garden.nodes[0], &garden.nodes[1], &garden.nodes[2]); - link_nodes(&garden.nodes[1], &garden.nodes[3], &garden.nodes[4]); - link_nodes(&garden.nodes[2], &garden.nodes[5], &garden.nodes[6]); - link_nodes(&garden.nodes[3], &garden.nodes[7], &garden.nodes[8]); - link_nodes(&garden.nodes[4], &garden.nodes[9], &garden.nodes[10]); - link_nodes(&garden.nodes[5], &garden.nodes[11], &garden.nodes[12]); - link_nodes(&garden.nodes[6], &garden.nodes[13], &garden.nodes[14]); - link_nodes(&garden.nodes[7], &garden.nodes[15], &garden.nodes[16]); - link_nodes(&garden.nodes[8], &garden.nodes[15], &garden.nodes[16]); - link_nodes(&garden.nodes[9], &garden.nodes[15], &garden.nodes[16]); - link_nodes(&garden.nodes[10], &garden.nodes[15], &garden.nodes[16]); - link_nodes(&garden.nodes[11], &garden.nodes[17], &garden.nodes[18]); - link_nodes(&garden.nodes[12], &garden.nodes[17], &garden.nodes[18]); - link_nodes(&garden.nodes[13], &garden.nodes[17], &garden.nodes[18]); - link_nodes(&garden.nodes[14], &garden.nodes[17], &garden.nodes[18]); - link_nodes(&garden.nodes[15], &garden.nodes[19], &garden.nodes[20]); - link_nodes(&garden.nodes[16], &garden.nodes[19], &garden.nodes[20]); - link_nodes(&garden.nodes[17], &garden.nodes[19], &garden.nodes[20]); - link_nodes(&garden.nodes[18], &garden.nodes[19], &garden.nodes[20]); - link_nodes(&garden.nodes[19], &garden.nodes[21], &garden.nodes[21]); - link_nodes(&garden.nodes[20], &garden.nodes[21], &garden.nodes[21]); - garden.nodes[21].childLeft = NULL; - garden.nodes[21].childRight = NULL; - return true; + + garden.nodes[ 0].child[0] = &garden.nodes[ 1]; + garden.nodes[ 0].child[1] = &garden.nodes[ 2]; + garden.nodes[ 1].child[0] = &garden.nodes[ 3]; + garden.nodes[ 1].child[1] = &garden.nodes[ 4]; + garden.nodes[ 2].child[0] = &garden.nodes[ 5]; + garden.nodes[ 2].child[1] = &garden.nodes[ 6]; + garden.nodes[ 3].child[0] = &garden.nodes[ 7]; + garden.nodes[ 3].child[1] = &garden.nodes[ 8]; + garden.nodes[ 4].child[0] = &garden.nodes[ 9]; + garden.nodes[ 4].child[1] = &garden.nodes[10]; + garden.nodes[ 5].child[0] = &garden.nodes[11]; + garden.nodes[ 5].child[1] = &garden.nodes[12]; + garden.nodes[ 6].child[0] = &garden.nodes[13]; + garden.nodes[ 6].child[1] = &garden.nodes[14]; + garden.nodes[ 7].child[0] = &garden.nodes[15]; + garden.nodes[ 7].child[1] = &garden.nodes[16]; + garden.nodes[ 8].child[0] = &garden.nodes[15]; + garden.nodes[ 8].child[1] = &garden.nodes[16]; + garden.nodes[ 9].child[0] = &garden.nodes[15]; + garden.nodes[ 9].child[1] = &garden.nodes[16]; + garden.nodes[10].child[0] = &garden.nodes[15]; + garden.nodes[10].child[1] = &garden.nodes[16]; + garden.nodes[11].child[0] = &garden.nodes[17]; + garden.nodes[11].child[1] = &garden.nodes[18]; + garden.nodes[12].child[0] = &garden.nodes[17]; + garden.nodes[12].child[1] = &garden.nodes[18]; + garden.nodes[13].child[0] = &garden.nodes[17]; + garden.nodes[13].child[1] = &garden.nodes[18]; + garden.nodes[14].child[0] = &garden.nodes[17]; + garden.nodes[14].child[1] = &garden.nodes[18]; + garden.nodes[15].child[0] = &garden.nodes[19]; + garden.nodes[15].child[1] = &garden.nodes[20]; + garden.nodes[16].child[0] = &garden.nodes[19]; + garden.nodes[16].child[1] = &garden.nodes[20]; + garden.nodes[17].child[0] = &garden.nodes[19]; + garden.nodes[17].child[1] = &garden.nodes[20]; + garden.nodes[18].child[0] = &garden.nodes[19]; + garden.nodes[18].child[1] = &garden.nodes[20]; + garden.nodes[19].child[0] = &garden.nodes[21]; + garden.nodes[19].child[1] = &garden.nodes[21]; + garden.nodes[20].child[0] = &garden.nodes[21]; + garden.nodes[20].child[1] = &garden.nodes[21]; + garden.nodes[21].child[0] = NULL; + garden.nodes[21].child[1] = NULL; + + return true; } // Produce a 32-byte hash from 80-byte input data @@ -236,20 +233,67 @@ int minotaur_hash( void *output, const void *input, int thr_id ) SHA512_Update( &garden.sha512, input, 80 ); SHA512_Final( (unsigned char*) hash, &garden.sha512 ); + // algo 6 (Hamsi) is very slow. It's faster to skip hashing this nonce + // if Hamsi is needed but only the first and last functions are + // currently known. Abort if either is Hamsi. + if ( ( ( hash[ 0] % MINOTAUR_ALGO_COUNT ) == 6 ) + || ( ( hash[21] % MINOTAUR_ALGO_COUNT ) == 6 ) ) + return 0; + // Assign algos to torture garden nodes based on initial hash for ( int i = 0; i < 22; i++ ) garden.nodes[i].algo = hash[i] % MINOTAUR_ALGO_COUNT; // Send the initial hash through the torture garden - traverse_garden( &garden, hash, &garden.nodes[0] ); + TortureNode *node = &garden.nodes[0]; - memcpy( output, hash, 32 ); + while ( node ) + { + get_hash( hash, hash, &garden, node->algo ); + node = node->child[ hash[63] & 1 ]; + } + memcpy( output, hash, 32 ); return 1; } +int scanhash_minotaur( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ) +{ + uint32_t edata[20] __attribute__((aligned(64))); + uint32_t hash[8] __attribute__((aligned(64))); + uint32_t *pdata = work->data; + uint32_t *ptarget = work->target; + const uint32_t first_nonce = pdata[19]; + const uint32_t last_nonce = max_nonce - 1; + uint32_t n = first_nonce; + const int thr_id = mythr->id; + const bool bench = opt_benchmark; + uint64_t skipped = 0; + + mm128_bswap32_80( edata, pdata ); + do + { + edata[19] = n; + if ( likely( algo_gate.hash( hash, edata, thr_id ) ) ) + { + if ( unlikely( valid_hash( hash, ptarget ) && !bench ) ) + { + pdata[19] = bswap_32( n ); + submit_solution( work, hash, mythr ); + } + } + else skipped++; + n++; + } while ( n < last_nonce && !work_restart[thr_id].restart ); + *hashes_done = n - first_nonce - skipped; + pdata[19] = n; + return 0; +} + bool register_minotaur_algo( algo_gate_t* gate ) { + gate->scanhash = (void*)&scanhash_minotaur; gate->hash = (void*)&minotaur_hash; gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT; gate->miner_thread_init = (void*)&initialize_torture_garden; diff --git a/algo/x16/x16r-4way.c b/algo/x16/x16r-4way.c index 79559619..abbe16a3 100644 --- a/algo/x16/x16r-4way.c +++ b/algo/x16/x16r-4way.c @@ -347,14 +347,14 @@ int x16r_8way_hash_generic( void* output, const void* input, int thrid ) hash7, vhash ); break; case FUGUE: - sph_fugue512_full( &ctx.fugue, hash0, in0, size ); - sph_fugue512_full( &ctx.fugue, hash1, in1, size ); - sph_fugue512_full( &ctx.fugue, hash2, in2, size ); - sph_fugue512_full( &ctx.fugue, hash3, in3, size ); - sph_fugue512_full( &ctx.fugue, hash4, in4, size ); - sph_fugue512_full( &ctx.fugue, hash5, in5, size ); - sph_fugue512_full( &ctx.fugue, hash6, in6, size ); - sph_fugue512_full( &ctx.fugue, hash7, in7, size ); + fugue512_full( &ctx.fugue, hash0, in0, size ); + fugue512_full( &ctx.fugue, hash1, in1, size ); + fugue512_full( &ctx.fugue, hash2, in2, size ); + fugue512_full( &ctx.fugue, hash3, in3, size ); + fugue512_full( &ctx.fugue, hash4, in4, size ); + fugue512_full( &ctx.fugue, hash5, in5, size ); + fugue512_full( &ctx.fugue, hash6, in6, size ); + fugue512_full( &ctx.fugue, hash7, in7, size ); break; case SHABAL: intrlv_8x32( vhash, in0, in1, in2, in3, in4, in5, in6, in7, @@ -747,10 +747,10 @@ int x16r_4way_hash_generic( void* output, const void* input, int thrid ) dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash ); break; case FUGUE: - sph_fugue512_full( &ctx.fugue, hash0, in0, size ); - sph_fugue512_full( &ctx.fugue, hash1, in1, size ); - sph_fugue512_full( &ctx.fugue, hash2, in2, size ); - sph_fugue512_full( &ctx.fugue, hash3, in3, size ); + fugue512_full( &ctx.fugue, hash0, in0, size ); + fugue512_full( &ctx.fugue, hash1, in1, size ); + fugue512_full( &ctx.fugue, hash2, in2, size ); + fugue512_full( &ctx.fugue, hash3, in3, size ); break; case SHABAL: intrlv_4x32( vhash, in0, in1, in2, in3, size<<3 ); diff --git a/algo/x16/x16r-gate.h b/algo/x16/x16r-gate.h index 0c373b33..cbd3899c 100644 --- a/algo/x16/x16r-gate.h +++ b/algo/x16/x16r-gate.h @@ -24,6 +24,7 @@ #if defined(__AES__) #include "algo/echo/aes_ni/hash_api.h" #include "algo/groestl/aes_ni/hash-groestl.h" + #include "algo/fugue/fugue-aesni.h" #endif #if defined (__AVX2__) #include "algo/blake/blake-hash-4way.h" @@ -111,7 +112,7 @@ union _x16r_8way_context_overlay cubehashParam cube; simd_4way_context simd; hamsi512_8way_context hamsi; - sph_fugue512_context fugue; + hashState_fugue fugue; shabal512_8way_context shabal; sph_whirlpool_context whirlpool; sha512_8way_context sha512; @@ -155,7 +156,7 @@ union _x16r_4way_context_overlay shavite512_context shavite; simd_2way_context simd; hamsi512_4way_context hamsi; - sph_fugue512_context fugue; + hashState_fugue fugue; shabal512_4way_context shabal; sph_whirlpool_context whirlpool; sha512_4way_context sha512; @@ -180,9 +181,11 @@ union _x16r_context_overlay #if defined(__AES__) hashState_echo echo; hashState_groestl groestl; + hashState_fugue fugue; #else sph_groestl512_context groestl; sph_echo512_context echo; + sph_fugue512_context fugue; #endif sph_blake512_context blake; sph_bmw512_context bmw; @@ -194,7 +197,6 @@ union _x16r_context_overlay shavite512_context shavite; hashState_sd simd; sph_hamsi512_context hamsi; - sph_fugue512_context fugue; sph_shabal512_context shabal; sph_whirlpool_context whirlpool; SHA512_CTX sha512; diff --git a/algo/x16/x16r.c b/algo/x16/x16r.c index e64feb0e..b8cab348 100644 --- a/algo/x16/x16r.c +++ b/algo/x16/x16r.c @@ -151,8 +151,12 @@ int x16r_hash_generic( void* output, const void* input, int thrid ) sph_hamsi512_close( &ctx.hamsi, hash ); break; case FUGUE: - sph_fugue512_full( &ctx.fugue, hash, in, size ); - break; +#if defined(__AES__) + fugue512_full( &ctx.fugue, hash, in, size ); +#else + sph_fugue512_full( &ctx.fugue, hash, in, size ); +#endif + break; case SHABAL: if ( i == 0 ) sph_shabal512( &ctx.shabal, in+64, 16 ); diff --git a/algo/x16/x16rv2-4way.c b/algo/x16/x16rv2-4way.c index 1db130c4..e2d80dab 100644 --- a/algo/x16/x16rv2-4way.c +++ b/algo/x16/x16rv2-4way.c @@ -8,30 +8,7 @@ #include #include #include -#include "algo/blake/blake-hash-4way.h" -#include "algo/bmw/bmw-hash-4way.h" -#include "algo/groestl/aes_ni/hash-groestl.h" -#include "algo/groestl/aes_ni/hash-groestl.h" -#include "algo/skein/skein-hash-4way.h" -#include "algo/jh/jh-hash-4way.h" -#include "algo/keccak/keccak-hash-4way.h" -#include "algo/shavite/sph_shavite.h" -#include "algo/luffa/luffa-hash-2way.h" -#include "algo/cubehash/cubehash_sse2.h" -#include "algo/cubehash/cube-hash-2way.h" -#include "algo/simd/simd-hash-2way.h" -#include "algo/echo/aes_ni/hash_api.h" -#include "algo/hamsi/hamsi-hash-4way.h" -#include "algo/fugue/sph_fugue.h" -#include "algo/shabal/shabal-hash-4way.h" -#include "algo/whirlpool/sph_whirlpool.h" -#include "algo/sha/sha-hash-4way.h" #include "algo/tiger/sph_tiger.h" -#if defined(__VAES__) - #include "algo/groestl/groestl512-hash-4way.h" - #include "algo/shavite/shavite-hash-4way.h" - #include "algo/echo/echo-hash-4way.h" -#endif #if defined (X16RV2_8WAY) @@ -46,7 +23,7 @@ union _x16rv2_8way_context_overlay cubehashParam cube; simd_4way_context simd; hamsi512_8way_context hamsi; - sph_fugue512_context fugue; + hashState_fugue fugue; shabal512_8way_context shabal; sph_whirlpool_context whirlpool; sha512_8way_context sha512; @@ -432,14 +409,14 @@ int x16rv2_8way_hash( void* output, const void* input, int thrid ) hash7, vhash ); break; case FUGUE: - sph_fugue512_full( &ctx.fugue, hash0, in0, size ); - sph_fugue512_full( &ctx.fugue, hash1, in1, size ); - sph_fugue512_full( &ctx.fugue, hash2, in2, size ); - sph_fugue512_full( &ctx.fugue, hash3, in3, size ); - sph_fugue512_full( &ctx.fugue, hash4, in4, size ); - sph_fugue512_full( &ctx.fugue, hash5, in5, size ); - sph_fugue512_full( &ctx.fugue, hash6, in6, size ); - sph_fugue512_full( &ctx.fugue, hash7, in7, size ); + fugue512_full( &ctx.fugue, hash0, in0, size ); + fugue512_full( &ctx.fugue, hash1, in1, size ); + fugue512_full( &ctx.fugue, hash2, in2, size ); + fugue512_full( &ctx.fugue, hash3, in3, size ); + fugue512_full( &ctx.fugue, hash4, in4, size ); + fugue512_full( &ctx.fugue, hash5, in5, size ); + fugue512_full( &ctx.fugue, hash6, in6, size ); + fugue512_full( &ctx.fugue, hash7, in7, size ); break; case SHABAL: intrlv_8x32( vhash, in0, in1, in2, in3, in4, in5, in6, in7, @@ -705,7 +682,7 @@ union _x16rv2_4way_context_overlay shavite512_context shavite; simd_2way_context simd; hamsi512_4way_context hamsi; - sph_fugue512_context fugue; + hashState_fugue fugue; shabal512_4way_context shabal; sph_whirlpool_context whirlpool; sha512_4way_context sha512; @@ -946,10 +923,10 @@ int x16rv2_4way_hash( void* output, const void* input, int thrid ) dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash ); break; case FUGUE: - sph_fugue512_full( &ctx.fugue, hash0, in0, size ); - sph_fugue512_full( &ctx.fugue, hash1, in1, size ); - sph_fugue512_full( &ctx.fugue, hash2, in2, size ); - sph_fugue512_full( &ctx.fugue, hash3, in3, size ); + fugue512_full( &ctx.fugue, hash0, in0, size ); + fugue512_full( &ctx.fugue, hash1, in1, size ); + fugue512_full( &ctx.fugue, hash2, in2, size ); + fugue512_full( &ctx.fugue, hash3, in3, size ); break; case SHABAL: intrlv_4x32( vhash, in0, in1, in2, in3, size<<3 ); diff --git a/algo/x16/x16rv2.c b/algo/x16/x16rv2.c index c0a5eac9..62109329 100644 --- a/algo/x16/x16rv2.c +++ b/algo/x16/x16rv2.c @@ -8,41 +8,18 @@ #if !defined(X16R_8WAY) && !defined(X16R_4WAY) -#include -#include -#include -#include "algo/blake/sph_blake.h" -#include "algo/bmw/sph_bmw.h" -#include "algo/groestl/sph_groestl.h" -#include "algo/jh/sph_jh.h" -#include "algo/keccak/sph_keccak.h" -#include "algo/skein/sph_skein.h" -#include "algo/shavite/sph_shavite.h" -#include "algo/luffa/luffa_for_sse2.h" -#include "algo/cubehash/cubehash_sse2.h" -#include "algo/simd/nist.h" -#include "algo/echo/sph_echo.h" -#include "algo/hamsi/sph_hamsi.h" -#include "algo/fugue/sph_fugue.h" -#include "algo/shabal/sph_shabal.h" -#include "algo/whirlpool/sph_whirlpool.h" -#include #include "algo/tiger/sph_tiger.h" -#if defined(__AES__) - #include "algo/echo/aes_ni/hash_api.h" - #include "algo/groestl/aes_ni/hash-groestl.h" -#endif - -static __thread uint32_t s_ntime = UINT32_MAX; union _x16rv2_context_overlay { #if defined(__AES__) hashState_echo echo; hashState_groestl groestl; + hashState_fugue fugue; #else sph_groestl512_context groestl; sph_echo512_context echo; + sph_fugue512_context fugue; #endif sph_blake512_context blake; sph_bmw512_context bmw; @@ -54,7 +31,6 @@ union _x16rv2_context_overlay shavite512_context shavite; hashState_sd simd; sph_hamsi512_context hamsi; - sph_fugue512_context fugue; sph_shabal512_context shabal; sph_whirlpool_context whirlpool; SHA512_CTX sha512; @@ -160,8 +136,12 @@ int x16rv2_hash( void* output, const void* input, int thrid ) sph_hamsi512_close( &ctx.hamsi, hash ); break; case FUGUE: +#if defined(__AES__) + fugue512_full( &ctx.fugue, hash, in, size ); +#else sph_fugue512_full( &ctx.fugue, hash, in, size ); - break; +#endif + break; case SHABAL: sph_shabal512_init( &ctx.shabal ); sph_shabal512( &ctx.shabal, in, size ); diff --git a/algo/x17/sonoa-4way.c b/algo/x17/sonoa-4way.c index a9e6b631..e4fe98b0 100644 --- a/algo/x17/sonoa-4way.c +++ b/algo/x17/sonoa-4way.c @@ -16,7 +16,7 @@ #include "algo/simd/simd-hash-2way.h" #include "algo/echo/aes_ni/hash_api.h" #include "algo/hamsi/hamsi-hash-4way.h" -#include "algo/fugue/sph_fugue.h" +#include "algo/fugue/fugue-aesni.h" #include "algo/shabal/shabal-hash-4way.h" #include "algo/whirlpool/sph_whirlpool.h" #include "algo/haval/haval-hash-4way.h" @@ -40,7 +40,7 @@ union _sonoa_8way_context_overlay cube_4way_context cube; simd_4way_context simd; hamsi512_8way_context hamsi; - sph_fugue512_context fugue; + hashState_fugue fugue; shabal512_8way_context shabal; sph_whirlpool_context whirlpool; sha512_8way_context sha512; @@ -423,14 +423,14 @@ int sonoa_8way_hash( void *state, const void *input, int thr_id ) dintrlv_8x64_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7, vhash ); - sph_fugue512_full( &ctx.fugue, hash0, hash0, 64 ); - sph_fugue512_full( &ctx.fugue, hash1, hash1, 64 ); - sph_fugue512_full( &ctx.fugue, hash2, hash2, 64 ); - sph_fugue512_full( &ctx.fugue, hash3, hash3, 64 ); - sph_fugue512_full( &ctx.fugue, hash4, hash4, 64 ); - sph_fugue512_full( &ctx.fugue, hash5, hash5, 64 ); - sph_fugue512_full( &ctx.fugue, hash6, hash6, 64 ); - sph_fugue512_full( &ctx.fugue, hash7, hash7, 64 ); + fugue512_full( &ctx.fugue, hash0, hash0, 64 ); + fugue512_full( &ctx.fugue, hash1, hash1, 64 ); + fugue512_full( &ctx.fugue, hash2, hash2, 64 ); + fugue512_full( &ctx.fugue, hash3, hash3, 64 ); + fugue512_full( &ctx.fugue, hash4, hash4, 64 ); + fugue512_full( &ctx.fugue, hash5, hash5, 64 ); + fugue512_full( &ctx.fugue, hash6, hash6, 64 ); + fugue512_full( &ctx.fugue, hash7, hash7, 64 ); if ( work_restart[thr_id].restart ) return 0; // 4 @@ -554,14 +554,14 @@ int sonoa_8way_hash( void *state, const void *input, int thr_id ) dintrlv_8x64_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7, vhash ); - sph_fugue512_full( &ctx.fugue, hash0, hash0, 64 ); - sph_fugue512_full( &ctx.fugue, hash1, hash1, 64 ); - sph_fugue512_full( &ctx.fugue, hash2, hash2, 64 ); - sph_fugue512_full( &ctx.fugue, hash3, hash3, 64 ); - sph_fugue512_full( &ctx.fugue, hash4, hash4, 64 ); - sph_fugue512_full( &ctx.fugue, hash5, hash5, 64 ); - sph_fugue512_full( &ctx.fugue, hash6, hash6, 64 ); - sph_fugue512_full( &ctx.fugue, hash7, hash7, 64 ); + fugue512_full( &ctx.fugue, hash0, hash0, 64 ); + fugue512_full( &ctx.fugue, hash1, hash1, 64 ); + fugue512_full( &ctx.fugue, hash2, hash2, 64 ); + fugue512_full( &ctx.fugue, hash3, hash3, 64 ); + fugue512_full( &ctx.fugue, hash4, hash4, 64 ); + fugue512_full( &ctx.fugue, hash5, hash5, 64 ); + fugue512_full( &ctx.fugue, hash6, hash6, 64 ); + fugue512_full( &ctx.fugue, hash7, hash7, 64 ); intrlv_8x32_512( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7 ); @@ -755,14 +755,14 @@ int sonoa_8way_hash( void *state, const void *input, int thr_id ) dintrlv_8x64_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7, vhash ); - sph_fugue512_full( &ctx.fugue, hash0, hash0, 64 ); - sph_fugue512_full( &ctx.fugue, hash1, hash1, 64 ); - sph_fugue512_full( &ctx.fugue, hash2, hash2, 64 ); - sph_fugue512_full( &ctx.fugue, hash3, hash3, 64 ); - sph_fugue512_full( &ctx.fugue, hash4, hash4, 64 ); - sph_fugue512_full( &ctx.fugue, hash5, hash5, 64 ); - sph_fugue512_full( &ctx.fugue, hash6, hash6, 64 ); - sph_fugue512_full( &ctx.fugue, hash7, hash7, 64 ); + fugue512_full( &ctx.fugue, hash0, hash0, 64 ); + fugue512_full( &ctx.fugue, hash1, hash1, 64 ); + fugue512_full( &ctx.fugue, hash2, hash2, 64 ); + fugue512_full( &ctx.fugue, hash3, hash3, 64 ); + fugue512_full( &ctx.fugue, hash4, hash4, 64 ); + fugue512_full( &ctx.fugue, hash5, hash5, 64 ); + fugue512_full( &ctx.fugue, hash6, hash6, 64 ); + fugue512_full( &ctx.fugue, hash7, hash7, 64 ); intrlv_8x32_512( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7 ); @@ -905,14 +905,14 @@ int sonoa_8way_hash( void *state, const void *input, int thr_id ) dintrlv_8x64_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7, vhash ); - sph_fugue512_full( &ctx.fugue, hash0, hash0, 64 ); - sph_fugue512_full( &ctx.fugue, hash1, hash1, 64 ); - sph_fugue512_full( &ctx.fugue, hash2, hash2, 64 ); - sph_fugue512_full( &ctx.fugue, hash3, hash3, 64 ); - sph_fugue512_full( &ctx.fugue, hash4, hash4, 64 ); - sph_fugue512_full( &ctx.fugue, hash5, hash5, 64 ); - sph_fugue512_full( &ctx.fugue, hash6, hash6, 64 ); - sph_fugue512_full( &ctx.fugue, hash7, hash7, 64 ); + fugue512_full( &ctx.fugue, hash0, hash0, 64 ); + fugue512_full( &ctx.fugue, hash1, hash1, 64 ); + fugue512_full( &ctx.fugue, hash2, hash2, 64 ); + fugue512_full( &ctx.fugue, hash3, hash3, 64 ); + fugue512_full( &ctx.fugue, hash4, hash4, 64 ); + fugue512_full( &ctx.fugue, hash5, hash5, 64 ); + fugue512_full( &ctx.fugue, hash6, hash6, 64 ); + fugue512_full( &ctx.fugue, hash7, hash7, 64 ); intrlv_8x32_512( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7 ); @@ -1074,14 +1074,14 @@ int sonoa_8way_hash( void *state, const void *input, int thr_id ) dintrlv_8x64_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7, vhash ); - sph_fugue512_full( &ctx.fugue, hash0, hash0, 64 ); - sph_fugue512_full( &ctx.fugue, hash1, hash1, 64 ); - sph_fugue512_full( &ctx.fugue, hash2, hash2, 64 ); - sph_fugue512_full( &ctx.fugue, hash3, hash3, 64 ); - sph_fugue512_full( &ctx.fugue, hash4, hash4, 64 ); - sph_fugue512_full( &ctx.fugue, hash5, hash5, 64 ); - sph_fugue512_full( &ctx.fugue, hash6, hash6, 64 ); - sph_fugue512_full( &ctx.fugue, hash7, hash7, 64 ); + fugue512_full( &ctx.fugue, hash0, hash0, 64 ); + fugue512_full( &ctx.fugue, hash1, hash1, 64 ); + fugue512_full( &ctx.fugue, hash2, hash2, 64 ); + fugue512_full( &ctx.fugue, hash3, hash3, 64 ); + fugue512_full( &ctx.fugue, hash4, hash4, 64 ); + fugue512_full( &ctx.fugue, hash5, hash5, 64 ); + fugue512_full( &ctx.fugue, hash6, hash6, 64 ); + fugue512_full( &ctx.fugue, hash7, hash7, 64 ); intrlv_8x32_512( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7 ); @@ -1134,7 +1134,7 @@ union _sonoa_4way_context_overlay simd_2way_context simd; hashState_echo echo; hamsi512_4way_context hamsi; - sph_fugue512_context fugue; + hashState_fugue fugue; shabal512_4way_context shabal; sph_whirlpool_context whirlpool; sha512_4way_context sha512; @@ -1327,10 +1327,10 @@ int sonoa_4way_hash( void *state, const void *input, int thr_id ) dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash ); - sph_fugue512_full( &ctx.fugue, hash0, hash0, 64 ); - sph_fugue512_full( &ctx.fugue, hash1, hash1, 64 ); - sph_fugue512_full( &ctx.fugue, hash2, hash2, 64 ); - sph_fugue512_full( &ctx.fugue, hash3, hash3, 64 ); + fugue512_full( &ctx.fugue, hash0, hash0, 64 ); + fugue512_full( &ctx.fugue, hash1, hash1, 64 ); + fugue512_full( &ctx.fugue, hash2, hash2, 64 ); + fugue512_full( &ctx.fugue, hash3, hash3, 64 ); if ( work_restart[thr_id].restart ) return 0; // 4 @@ -1393,10 +1393,10 @@ int sonoa_4way_hash( void *state, const void *input, int thr_id ) dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash ); - sph_fugue512_full( &ctx.fugue, hash0, hash0, 64 ); - sph_fugue512_full( &ctx.fugue, hash1, hash1, 64 ); - sph_fugue512_full( &ctx.fugue, hash2, hash2, 64 ); - sph_fugue512_full( &ctx.fugue, hash3, hash3, 64 ); + fugue512_full( &ctx.fugue, hash0, hash0, 64 ); + fugue512_full( &ctx.fugue, hash1, hash1, 64 ); + fugue512_full( &ctx.fugue, hash2, hash2, 64 ); + fugue512_full( &ctx.fugue, hash3, hash3, 64 ); intrlv_4x32_512( vhash, hash0, hash1, hash2, hash3 ); @@ -1496,10 +1496,10 @@ int sonoa_4way_hash( void *state, const void *input, int thr_id ) dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash ); - sph_fugue512_full( &ctx.fugue, hash0, hash0, 64 ); - sph_fugue512_full( &ctx.fugue, hash1, hash1, 64 ); - sph_fugue512_full( &ctx.fugue, hash2, hash2, 64 ); - sph_fugue512_full( &ctx.fugue, hash3, hash3, 64 ); + fugue512_full( &ctx.fugue, hash0, hash0, 64 ); + fugue512_full( &ctx.fugue, hash1, hash1, 64 ); + fugue512_full( &ctx.fugue, hash2, hash2, 64 ); + fugue512_full( &ctx.fugue, hash3, hash3, 64 ); intrlv_4x32_512( vhash, hash0, hash1, hash2, hash3 ); @@ -1576,10 +1576,10 @@ int sonoa_4way_hash( void *state, const void *input, int thr_id ) dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash ); - sph_fugue512_full( &ctx.fugue, hash0, hash0, 64 ); - sph_fugue512_full( &ctx.fugue, hash1, hash1, 64 ); - sph_fugue512_full( &ctx.fugue, hash2, hash2, 64 ); - sph_fugue512_full( &ctx.fugue, hash3, hash3, 64 ); + fugue512_full( &ctx.fugue, hash0, hash0, 64 ); + fugue512_full( &ctx.fugue, hash1, hash1, 64 ); + fugue512_full( &ctx.fugue, hash2, hash2, 64 ); + fugue512_full( &ctx.fugue, hash3, hash3, 64 ); intrlv_4x32_512( vhash, hash0, hash1, hash2, hash3 ); @@ -1669,10 +1669,10 @@ int sonoa_4way_hash( void *state, const void *input, int thr_id ) dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash ); - sph_fugue512_full( &ctx.fugue, hash0, hash0, 64 ); - sph_fugue512_full( &ctx.fugue, hash1, hash1, 64 ); - sph_fugue512_full( &ctx.fugue, hash2, hash2, 64 ); - sph_fugue512_full( &ctx.fugue, hash3, hash3, 64 ); + fugue512_full( &ctx.fugue, hash0, hash0, 64 ); + fugue512_full( &ctx.fugue, hash1, hash1, 64 ); + fugue512_full( &ctx.fugue, hash2, hash2, 64 ); + fugue512_full( &ctx.fugue, hash3, hash3, 64 ); intrlv_4x32_512( vhash, hash0, hash1, hash2, hash3 ); diff --git a/algo/x17/sonoa.c b/algo/x17/sonoa.c index 19dbcb7c..4253ad89 100644 --- a/algo/x17/sonoa.c +++ b/algo/x17/sonoa.c @@ -14,7 +14,6 @@ #include "algo/skein/sph_skein.h" #include "algo/shavite/sph_shavite.h" #include "algo/hamsi/sph_hamsi.h" -#include "algo/fugue/sph_fugue.h" #include "algo/shabal/sph_shabal.h" #include "algo/whirlpool/sph_whirlpool.h" #include "algo/haval/sph-haval.h" @@ -25,9 +24,11 @@ #if defined(__AES__) #include "algo/echo/aes_ni/hash_api.h" #include "algo/groestl/aes_ni/hash-groestl.h" + #include "algo/fugue/fugue-aesni.h" #else #include "algo/groestl/sph_groestl.h" #include "algo/echo/sph_echo.h" + #include "algo/fugue/sph_fugue.h" #endif typedef struct { @@ -36,9 +37,11 @@ typedef struct { #if defined(__AES__) hashState_echo echo; hashState_groestl groestl; + hashState_fugue fugue; #else sph_groestl512_context groestl; sph_echo512_context echo; + sph_fugue512_context fugue; #endif sph_jh512_context jh; sph_keccak512_context keccak; @@ -48,7 +51,6 @@ typedef struct { sph_shavite512_context shavite; hashState_sd simd; sph_hamsi512_context hamsi; - sph_fugue512_context fugue; sph_shabal512_context shabal; sph_whirlpool_context whirlpool; SHA512_CTX sha512; @@ -64,9 +66,11 @@ void init_sonoa_ctx() #if defined(__AES__) init_echo( &sonoa_ctx.echo, 512 ); init_groestl( &sonoa_ctx.groestl, 64 ); + fugue512_Init( &sonoa_ctx.fugue, 512 ); #else sph_groestl512_init(&sonoa_ctx.groestl ); sph_echo512_init( &sonoa_ctx.echo ); + sph_fugue512_init( &sonoa_ctx.fugue ); #endif sph_skein512_init( &sonoa_ctx.skein); sph_jh512_init( &sonoa_ctx.jh); @@ -76,7 +80,6 @@ void init_sonoa_ctx() sph_shavite512_init( &sonoa_ctx.shavite ); init_sd( &sonoa_ctx.simd, 512 ); sph_hamsi512_init( &sonoa_ctx.hamsi ); - sph_fugue512_init( &sonoa_ctx.fugue ); sph_shabal512_init( &sonoa_ctx.shabal ); sph_whirlpool_init( &sonoa_ctx.whirlpool ); SHA512_Init( &sonoa_ctx.sha512 ); @@ -249,8 +252,13 @@ int sonoa_hash( void *state, const void *input, int thr_id ) sph_hamsi512(&ctx.hamsi, hash, 64); sph_hamsi512_close(&ctx.hamsi, hash); +#if defined(__AES__) + fugue512_Update( &ctx.fugue, hash, 512 ); + fugue512_Final( &ctx.fugue, hash ); +#else sph_fugue512(&ctx.fugue, hash, 64); sph_fugue512_close(&ctx.fugue, hash); +#endif if ( work_restart[thr_id].restart ) return 0; // @@ -311,9 +319,11 @@ int sonoa_hash( void *state, const void *input, int thr_id ) sph_hamsi512(&ctx.hamsi, hash, 64); sph_hamsi512_close(&ctx.hamsi, hash); - sph_fugue512_init( &ctx.fugue ); - sph_fugue512(&ctx.fugue, hash, 64); - sph_fugue512_close(&ctx.fugue, hash); +#if defined(__AES__) + fugue512_full( &ctx.fugue, hash, hash, 64 ); +#else + sph_fugue512_full( &ctx.fugue, hash, hash, 64 ); +#endif sph_shabal512(&ctx.shabal, hash, 64); sph_shabal512_close(&ctx.shabal, hash); @@ -399,9 +409,11 @@ int sonoa_hash( void *state, const void *input, int thr_id ) sph_hamsi512(&ctx.hamsi, hash, 64); sph_hamsi512_close(&ctx.hamsi, hash); - sph_fugue512_init( &ctx.fugue ); - sph_fugue512(&ctx.fugue, hash, 64); - sph_fugue512_close(&ctx.fugue, hash); +#if defined(__AES__) + fugue512_full( &ctx.fugue, hash, hash, 64 ); +#else + sph_fugue512_full( &ctx.fugue, hash, hash, 64 ); +#endif sph_shabal512_init( &ctx.shabal ); sph_shabal512(&ctx.shabal, hash, 64); @@ -468,9 +480,11 @@ int sonoa_hash( void *state, const void *input, int thr_id ) sph_hamsi512(&ctx.hamsi, hash, 64); sph_hamsi512_close(&ctx.hamsi, hash); - sph_fugue512_init( &ctx.fugue ); - sph_fugue512(&ctx.fugue, hash, 64); - sph_fugue512_close(&ctx.fugue, hash); +#if defined(__AES__) + fugue512_full( &ctx.fugue, hash, hash, 64 ); +#else + sph_fugue512_full( &ctx.fugue, hash, hash, 64 ); +#endif sph_shabal512_init( &ctx.shabal ); sph_shabal512(&ctx.shabal, hash, 64); @@ -546,9 +560,11 @@ int sonoa_hash( void *state, const void *input, int thr_id ) sph_hamsi512(&ctx.hamsi, hash, 64); sph_hamsi512_close(&ctx.hamsi, hash); - sph_fugue512_init( &ctx.fugue ); - sph_fugue512(&ctx.fugue, hash, 64); - sph_fugue512_close(&ctx.fugue, hash); +#if defined(__AES__) + fugue512_full( &ctx.fugue, hash, hash, 64 ); +#else + sph_fugue512_full( &ctx.fugue, hash, hash, 64 ); +#endif sph_shabal512_init( &ctx.shabal ); sph_shabal512(&ctx.shabal, hash, 64); diff --git a/algo/x17/x17-4way.c b/algo/x17/x17-4way.c index 4fe98bc2..cce38943 100644 --- a/algo/x17/x17-4way.c +++ b/algo/x17/x17-4way.c @@ -21,7 +21,7 @@ #include "algo/simd/simd-hash-2way.h" #include "algo/echo/aes_ni/hash_api.h" #include "algo/hamsi/hamsi-hash-4way.h" -#include "algo/fugue/sph_fugue.h" +#include "algo/fugue/fugue-aesni.h" #include "algo/shabal/shabal-hash-4way.h" #include "algo/whirlpool/sph_whirlpool.h" #include "algo/haval/haval-hash-4way.h" @@ -49,7 +49,7 @@ union _x17_8way_context_overlay #endif simd_4way_context simd; hamsi512_8way_context hamsi; - sph_fugue512_context fugue; + hashState_fugue fugue; shabal512_8way_context shabal; sph_whirlpool_context whirlpool; sha512_8way_context sha512; @@ -190,14 +190,14 @@ int x17_8way_hash( void *state, const void *input, int thr_id ) dintrlv_8x64_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7, vhash ); - sph_fugue512_full( &ctx.fugue, hash0, hash0, 64 ); - sph_fugue512_full( &ctx.fugue, hash1, hash1, 64 ); - sph_fugue512_full( &ctx.fugue, hash2, hash2, 64 ); - sph_fugue512_full( &ctx.fugue, hash3, hash3, 64 ); - sph_fugue512_full( &ctx.fugue, hash4, hash4, 64 ); - sph_fugue512_full( &ctx.fugue, hash5, hash5, 64 ); - sph_fugue512_full( &ctx.fugue, hash6, hash6, 64 ); - sph_fugue512_full( &ctx.fugue, hash7, hash7, 64 ); + fugue512_full( &ctx.fugue, hash0, hash0, 64 ); + fugue512_full( &ctx.fugue, hash1, hash1, 64 ); + fugue512_full( &ctx.fugue, hash2, hash2, 64 ); + fugue512_full( &ctx.fugue, hash3, hash3, 64 ); + fugue512_full( &ctx.fugue, hash4, hash4, 64 ); + fugue512_full( &ctx.fugue, hash5, hash5, 64 ); + fugue512_full( &ctx.fugue, hash6, hash6, 64 ); + fugue512_full( &ctx.fugue, hash7, hash7, 64 ); intrlv_8x32_512( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7 ); @@ -250,7 +250,7 @@ union _x17_4way_context_overlay simd_2way_context simd; hashState_echo echo; hamsi512_4way_context hamsi; - sph_fugue512_context fugue; + hashState_fugue fugue; shabal512_4way_context shabal; sph_whirlpool_context whirlpool; sha512_4way_context sha512; @@ -328,10 +328,10 @@ int x17_4way_hash( void *state, const void *input, int thr_id ) dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash ); - sph_fugue512_full( &ctx.fugue, hash0, hash0, 64 ); - sph_fugue512_full( &ctx.fugue, hash1, hash1, 64 ); - sph_fugue512_full( &ctx.fugue, hash2, hash2, 64 ); - sph_fugue512_full( &ctx.fugue, hash3, hash3, 64 ); + fugue512_full( &ctx.fugue, hash0, hash0, 64 ); + fugue512_full( &ctx.fugue, hash1, hash1, 64 ); + fugue512_full( &ctx.fugue, hash2, hash2, 64 ); + fugue512_full( &ctx.fugue, hash3, hash3, 64 ); intrlv_4x32_512( vhash, hash0, hash1, hash2, hash3 ); diff --git a/algo/x17/x17.c b/algo/x17/x17.c index dbfb1049..e6a9a06a 100644 --- a/algo/x17/x17.c +++ b/algo/x17/x17.c @@ -13,7 +13,6 @@ #include "algo/skein/sph_skein.h" #include "algo/shavite/sph_shavite.h" #include "algo/hamsi/sph_hamsi.h" -#include "algo/fugue/sph_fugue.h" #include "algo/shabal/sph_shabal.h" #include "algo/whirlpool/sph_whirlpool.h" #include "algo/haval/sph-haval.h" @@ -22,11 +21,13 @@ #include "algo/simd/nist.h" #include #if defined(__AES__) + #include "algo/fugue/fugue-aesni.h" #include "algo/echo/aes_ni/hash_api.h" #include "algo/groestl/aes_ni/hash-groestl.h" #else #include "algo/groestl/sph_groestl.h" #include "algo/echo/sph_echo.h" + #include "algo/fugue/sph_fugue.h" #endif union _x17_context_overlay @@ -36,9 +37,11 @@ union _x17_context_overlay #if defined(__AES__) hashState_groestl groestl; hashState_echo echo; + hashState_fugue fugue; #else sph_groestl512_context groestl; sph_echo512_context echo; + sph_fugue512_context fugue; #endif sph_jh512_context jh; sph_keccak512_context keccak; @@ -48,7 +51,6 @@ union _x17_context_overlay sph_shavite512_context shavite; hashState_sd simd; sph_hamsi512_context hamsi; - sph_fugue512_context fugue; sph_shabal512_context shabal; sph_whirlpool_context whirlpool; SHA512_CTX sha512; @@ -122,9 +124,11 @@ int x17_hash(void *output, const void *input, int thr_id ) sph_hamsi512_close( &ctx.hamsi, hash ); // 13 Fugue - sph_fugue512_init( &ctx.fugue ); - sph_fugue512(&ctx.fugue, hash, 64 ); - sph_fugue512_close(&ctx.fugue, hash ); +#if defined(__AES__) + fugue512_full( &ctx.fugue, hash, hash, 64 ); +#else + sph_fugue512_full( &ctx.fugue, hash, hash, 64 ); +#endif // X14 Shabal sph_shabal512_init( &ctx.shabal ); diff --git a/algo/x17/xevan-4way.c b/algo/x17/xevan-4way.c index fbf5d26c..beb9df6e 100644 --- a/algo/x17/xevan-4way.c +++ b/algo/x17/xevan-4way.c @@ -16,7 +16,7 @@ #include "algo/simd/simd-hash-2way.h" #include "algo/echo/aes_ni/hash_api.h" #include "algo/hamsi/hamsi-hash-4way.h" -#include "algo/fugue/sph_fugue.h" +#include "algo/fugue/fugue-aesni.h" #include "algo/shabal/shabal-hash-4way.h" #include "algo/whirlpool/sph_whirlpool.h" #include "algo/sha/sha-hash-4way.h" @@ -40,7 +40,7 @@ union _xevan_8way_context_overlay cube_4way_context cube; simd_4way_context simd; hamsi512_8way_context hamsi; - sph_fugue512_context fugue; + hashState_fugue fugue; shabal512_8way_context shabal; sph_whirlpool_context whirlpool; sha512_8way_context sha512; @@ -192,14 +192,14 @@ int xevan_8way_hash( void *output, const void *input, int thr_id ) dintrlv_8x64( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7, vhash, dataLen<<3 ); - sph_fugue512_full( &ctx.fugue, hash0, hash0, dataLen ); - sph_fugue512_full( &ctx.fugue, hash1, hash1, dataLen ); - sph_fugue512_full( &ctx.fugue, hash2, hash2, dataLen ); - sph_fugue512_full( &ctx.fugue, hash3, hash3, dataLen ); - sph_fugue512_full( &ctx.fugue, hash4, hash4, dataLen ); - sph_fugue512_full( &ctx.fugue, hash5, hash5, dataLen ); - sph_fugue512_full( &ctx.fugue, hash6, hash6, dataLen ); - sph_fugue512_full( &ctx.fugue, hash7, hash7, dataLen ); + fugue512_full( &ctx.fugue, hash0, hash0, dataLen ); + fugue512_full( &ctx.fugue, hash1, hash1, dataLen ); + fugue512_full( &ctx.fugue, hash2, hash2, dataLen ); + fugue512_full( &ctx.fugue, hash3, hash3, dataLen ); + fugue512_full( &ctx.fugue, hash4, hash4, dataLen ); + fugue512_full( &ctx.fugue, hash5, hash5, dataLen ); + fugue512_full( &ctx.fugue, hash6, hash6, dataLen ); + fugue512_full( &ctx.fugue, hash7, hash7, dataLen ); intrlv_8x32( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7, dataLen<<3 ); @@ -355,14 +355,14 @@ int xevan_8way_hash( void *output, const void *input, int thr_id ) dintrlv_8x64( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7, vhash, dataLen<<3 ); - sph_fugue512_full( &ctx.fugue, hash0, hash0, dataLen ); - sph_fugue512_full( &ctx.fugue, hash1, hash1, dataLen ); - sph_fugue512_full( &ctx.fugue, hash2, hash2, dataLen ); - sph_fugue512_full( &ctx.fugue, hash3, hash3, dataLen ); - sph_fugue512_full( &ctx.fugue, hash4, hash4, dataLen ); - sph_fugue512_full( &ctx.fugue, hash5, hash5, dataLen ); - sph_fugue512_full( &ctx.fugue, hash6, hash6, dataLen ); - sph_fugue512_full( &ctx.fugue, hash7, hash7, dataLen ); + fugue512_full( &ctx.fugue, hash0, hash0, dataLen ); + fugue512_full( &ctx.fugue, hash1, hash1, dataLen ); + fugue512_full( &ctx.fugue, hash2, hash2, dataLen ); + fugue512_full( &ctx.fugue, hash3, hash3, dataLen ); + fugue512_full( &ctx.fugue, hash4, hash4, dataLen ); + fugue512_full( &ctx.fugue, hash5, hash5, dataLen ); + fugue512_full( &ctx.fugue, hash6, hash6, dataLen ); + fugue512_full( &ctx.fugue, hash7, hash7, dataLen ); intrlv_8x32( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7, dataLen<<3 ); @@ -415,7 +415,7 @@ union _xevan_4way_context_overlay simd_2way_context simd; hashState_echo echo; hamsi512_4way_context hamsi; - sph_fugue512_context fugue; + hashState_fugue fugue; shabal512_4way_context shabal; sph_whirlpool_context whirlpool; sha512_4way_context sha512; @@ -498,10 +498,10 @@ int xevan_4way_hash( void *output, const void *input, int thr_id ) dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, dataLen<<3 ); - sph_fugue512_full( &ctx.fugue, hash0, hash0, dataLen ); - sph_fugue512_full( &ctx.fugue, hash1, hash1, dataLen ); - sph_fugue512_full( &ctx.fugue, hash2, hash2, dataLen ); - sph_fugue512_full( &ctx.fugue, hash3, hash3, dataLen ); + fugue512_full( &ctx.fugue, hash0, hash0, dataLen ); + fugue512_full( &ctx.fugue, hash1, hash1, dataLen ); + fugue512_full( &ctx.fugue, hash2, hash2, dataLen ); + fugue512_full( &ctx.fugue, hash3, hash3, dataLen ); // Parallel 4way 32 bit intrlv_4x32( vhash, hash0, hash1, hash2, hash3, dataLen<<3 ); @@ -595,10 +595,10 @@ int xevan_4way_hash( void *output, const void *input, int thr_id ) dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, dataLen<<3 ); - sph_fugue512_full( &ctx.fugue, hash0, hash0, dataLen ); - sph_fugue512_full( &ctx.fugue, hash1, hash1, dataLen ); - sph_fugue512_full( &ctx.fugue, hash2, hash2, dataLen ); - sph_fugue512_full( &ctx.fugue, hash3, hash3, dataLen ); + fugue512_full( &ctx.fugue, hash0, hash0, dataLen ); + fugue512_full( &ctx.fugue, hash1, hash1, dataLen ); + fugue512_full( &ctx.fugue, hash2, hash2, dataLen ); + fugue512_full( &ctx.fugue, hash3, hash3, dataLen ); intrlv_4x32( vhash, hash0, hash1, hash2, hash3, dataLen<<3 ); diff --git a/algo/x17/xevan.c b/algo/x17/xevan.c index 38bfa564..08ed580e 100644 --- a/algo/x17/xevan.c +++ b/algo/x17/xevan.c @@ -15,7 +15,6 @@ #include "algo/shavite/sph_shavite.h" #include "algo/luffa/luffa_for_sse2.h" #include "algo/hamsi/sph_hamsi.h" -#include "algo/fugue/sph_fugue.h" #include "algo/shabal/sph_shabal.h" #include "algo/whirlpool/sph_whirlpool.h" #include "algo/haval/sph-haval.h" @@ -25,9 +24,11 @@ #if defined(__AES__) #include "algo/groestl/aes_ni/hash-groestl.h" #include "algo/echo/aes_ni/hash_api.h" + #include "algo/fugue/fugue-aesni.h" #else #include "algo/groestl/sph_groestl.h" #include "algo/echo/sph_echo.h" + #include "algo/fugue/sph_fugue.h" #endif typedef struct { @@ -41,7 +42,6 @@ typedef struct { sph_shavite512_context shavite; hashState_sd simd; sph_hamsi512_context hamsi; - sph_fugue512_context fugue; sph_shabal512_context shabal; sph_whirlpool_context whirlpool; SHA512_CTX sha512; @@ -49,9 +49,11 @@ typedef struct { #if defined(__AES__) hashState_echo echo; hashState_groestl groestl; + hashState_fugue fugue; #else sph_groestl512_context groestl; sph_echo512_context echo; + sph_fugue512_context fugue; #endif } xevan_ctx_holder; @@ -69,7 +71,6 @@ void init_xevan_ctx() sph_shavite512_init( &xevan_ctx.shavite ); init_sd( &xevan_ctx.simd, 512 ); sph_hamsi512_init( &xevan_ctx.hamsi ); - sph_fugue512_init( &xevan_ctx.fugue ); sph_shabal512_init( &xevan_ctx.shabal ); sph_whirlpool_init( &xevan_ctx.whirlpool ); SHA512_Init( &xevan_ctx.sha512 ); @@ -77,9 +78,11 @@ void init_xevan_ctx() #if defined(__AES__) init_groestl( &xevan_ctx.groestl, 64 ); init_echo( &xevan_ctx.echo, 512 ); + fugue512_Init( &xevan_ctx.fugue, 512 ); #else sph_groestl512_init( &xevan_ctx.groestl ); sph_echo512_init( &xevan_ctx.echo ); + sph_fugue512_init( &xevan_ctx.fugue ); #endif }; @@ -137,8 +140,13 @@ int xevan_hash(void *output, const void *input, int thr_id ) sph_hamsi512(&ctx.hamsi, hash, dataLen); sph_hamsi512_close(&ctx.hamsi, hash); +#if defined(__AES__) + fugue512_Update( &ctx.fugue, hash, dataLen*8 ); + fugue512_Final( &ctx.fugue, hash ); +#else sph_fugue512(&ctx.fugue, hash, dataLen); sph_fugue512_close(&ctx.fugue, hash); +#endif sph_shabal512(&ctx.shabal, hash, dataLen); sph_shabal512_close(&ctx.shabal, hash); @@ -202,8 +210,13 @@ int xevan_hash(void *output, const void *input, int thr_id ) sph_hamsi512(&ctx.hamsi, hash, dataLen); sph_hamsi512_close(&ctx.hamsi, hash); +#if defined(__AES__) + fugue512_Update( &ctx.fugue, hash, dataLen*8 ); + fugue512_Final( &ctx.fugue, hash ); +#else sph_fugue512(&ctx.fugue, hash, dataLen); sph_fugue512_close(&ctx.fugue, hash); +#endif sph_shabal512(&ctx.shabal, hash, dataLen); sph_shabal512_close(&ctx.shabal, hash); diff --git a/algo/x22/x22i-4way.c b/algo/x22/x22i-4way.c index 8d519ee6..e61d1add 100644 --- a/algo/x22/x22i-4way.c +++ b/algo/x22/x22i-4way.c @@ -13,7 +13,7 @@ #include "algo/simd/simd-hash-2way.h" #include "algo/shavite/sph_shavite.h" #include "algo/hamsi/hamsi-hash-4way.h" -#include "algo/fugue/sph_fugue.h" +#include "algo/fugue/fugue-aesni.h" #include "algo/shabal/shabal-hash-4way.h" #include "algo/whirlpool/sph_whirlpool.h" #include "algo/sha/sha-hash-4way.h" @@ -42,7 +42,7 @@ union _x22i_8way_ctx_overlay cube_4way_context cube; simd_4way_context simd; hamsi512_8way_context hamsi; - sph_fugue512_context fugue; + hashState_fugue fugue; shabal512_8way_context shabal; sph_whirlpool_context whirlpool; sha512_8way_context sha512; @@ -225,30 +225,14 @@ int x22i_8way_hash( void *output, const void *input, int thrid ) dintrlv_8x64_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7, vhash ); - sph_fugue512_init( &ctx.fugue ); - sph_fugue512( &ctx.fugue, hash0, 64 ); - sph_fugue512_close( &ctx.fugue, hash0 ); - sph_fugue512_init( &ctx.fugue ); - sph_fugue512( &ctx.fugue, hash1, 64 ); - sph_fugue512_close( &ctx.fugue, hash1 ); - sph_fugue512_init( &ctx.fugue ); - sph_fugue512( &ctx.fugue, hash2, 64 ); - sph_fugue512_close( &ctx.fugue, hash2 ); - sph_fugue512_init( &ctx.fugue ); - sph_fugue512( &ctx.fugue, hash3, 64 ); - sph_fugue512_close( &ctx.fugue, hash3 ); - sph_fugue512_init( &ctx.fugue ); - sph_fugue512( &ctx.fugue, hash4, 64 ); - sph_fugue512_close( &ctx.fugue, hash4 ); - sph_fugue512_init( &ctx.fugue ); - sph_fugue512( &ctx.fugue, hash5, 64 ); - sph_fugue512_close( &ctx.fugue, hash5 ); - sph_fugue512_init( &ctx.fugue ); - sph_fugue512( &ctx.fugue, hash6, 64 ); - sph_fugue512_close( &ctx.fugue, hash6 ); - sph_fugue512_init( &ctx.fugue ); - sph_fugue512( &ctx.fugue, hash7, 64 ); - sph_fugue512_close( &ctx.fugue, hash7 ); + fugue512_full( &ctx.fugue, hash0, hash0, 64 ); + fugue512_full( &ctx.fugue, hash1, hash1, 64 ); + fugue512_full( &ctx.fugue, hash2, hash2, 64 ); + fugue512_full( &ctx.fugue, hash3, hash3, 64 ); + fugue512_full( &ctx.fugue, hash4, hash4, 64 ); + fugue512_full( &ctx.fugue, hash5, hash5, 64 ); + fugue512_full( &ctx.fugue, hash6, hash6, 64 ); + fugue512_full( &ctx.fugue, hash7, hash7, 64 ); intrlv_8x32_512( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7 ); @@ -520,7 +504,7 @@ union _x22i_4way_ctx_overlay shavite512_2way_context shavite; simd_2way_context simd; hamsi512_4way_context hamsi; - sph_fugue512_context fugue; + hashState_fugue fugue; shabal512_4way_context shabal; sph_whirlpool_context whirlpool; sha512_4way_context sha512; @@ -607,18 +591,10 @@ int x22i_4way_hash( void *output, const void *input, int thrid ) hamsi512_4way_close( &ctx.hamsi, vhash ); dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash ); - sph_fugue512_init( &ctx.fugue ); - sph_fugue512( &ctx.fugue, hash0, 64 ); - sph_fugue512_close( &ctx.fugue, hash0 ); - sph_fugue512_init( &ctx.fugue ); - sph_fugue512( &ctx.fugue, hash1, 64 ); - sph_fugue512_close( &ctx.fugue, hash1 ); - sph_fugue512_init( &ctx.fugue ); - sph_fugue512( &ctx.fugue, hash2, 64 ); - sph_fugue512_close( &ctx.fugue, hash2 ); - sph_fugue512_init( &ctx.fugue ); - sph_fugue512( &ctx.fugue, hash3, 64 ); - sph_fugue512_close( &ctx.fugue, hash3 ); + fugue512_full( &ctx.fugue, hash0, hash0, 64 ); + fugue512_full( &ctx.fugue, hash1, hash1, 64 ); + fugue512_full( &ctx.fugue, hash2, hash2, 64 ); + fugue512_full( &ctx.fugue, hash3, hash3, 64 ); intrlv_4x32_512( vhash, hash0, hash1, hash2, hash3 ); diff --git a/algo/x22/x22i.c b/algo/x22/x22i.c index f3504204..247ea4ae 100644 --- a/algo/x22/x22i.c +++ b/algo/x22/x22i.c @@ -7,9 +7,11 @@ #if defined(__AES__) #include "algo/echo/aes_ni/hash_api.h" #include "algo/groestl/aes_ni/hash-groestl.h" + #include "algo/fugue/fugue-aesni.h" #else #include "algo/groestl/sph_groestl.h" #include "algo/echo/sph_echo.h" + #include "algo/fugue/sph_fugue.h" #endif #include "algo/skein/sph_skein.h" #include "algo/jh/sph_jh.h" @@ -19,7 +21,6 @@ #include "algo/shavite/sph_shavite.h" #include "algo/simd/nist.h" #include "algo/hamsi/sph_hamsi.h" -#include "algo/fugue/sph_fugue.h" #include "algo/shabal/sph_shabal.h" #include "algo/whirlpool/sph_whirlpool.h" #include @@ -36,9 +37,11 @@ union _x22i_context_overlay #if defined(__AES__) hashState_groestl groestl; hashState_echo echo; + hashState_fugue fugue; #else sph_groestl512_context groestl; sph_echo512_context echo; + sph_fugue512_context fugue; #endif sph_jh512_context jh; sph_keccak512_context keccak; @@ -48,7 +51,6 @@ union _x22i_context_overlay sph_shavite512_context shavite; hashState_sd simd; sph_hamsi512_context hamsi; - sph_fugue512_context fugue; sph_shabal512_context shabal; sph_whirlpool_context whirlpool; SHA512_CTX sha512; @@ -129,9 +131,13 @@ int x22i_hash( void *output, const void *input, int thrid ) sph_hamsi512(&ctx.hamsi, (const void*) hash, 64); sph_hamsi512_close(&ctx.hamsi, hash); +#if defined(__AES__) + fugue512_full( &ctx.fugue, hash, hash, 64 ); +#else sph_fugue512_init(&ctx.fugue); sph_fugue512(&ctx.fugue, (const void*) hash, 64); sph_fugue512_close(&ctx.fugue, hash); +#endif sph_shabal512_init(&ctx.shabal); sph_shabal512(&ctx.shabal, (const void*) hash, 64); diff --git a/algo/x22/x25x-4way.c b/algo/x22/x25x-4way.c index 3e672af6..1cdea113 100644 --- a/algo/x22/x25x-4way.c +++ b/algo/x22/x25x-4way.c @@ -18,7 +18,7 @@ #include "algo/shavite/sph_shavite.h" #include "algo/simd/nist.h" #include "algo/simd/simd-hash-2way.h" -#include "algo/fugue/sph_fugue.h" +#include "algo/fugue/fugue-aesni.h" #include "algo/whirlpool/sph_whirlpool.h" #include "algo/tiger/sph_tiger.h" #include "algo/lyra2/lyra2.h" @@ -72,7 +72,7 @@ union _x25x_8way_ctx_overlay cube_4way_context cube; simd_4way_context simd; hamsi512_8way_context hamsi; - sph_fugue512_context fugue; + hashState_fugue fugue; shabal512_8way_context shabal; sph_whirlpool_context whirlpool; sha512_8way_context sha512; @@ -303,30 +303,15 @@ int x25x_8way_hash( void *output, const void *input, int thrid ) dintrlv_8x64_512( hash0[11], hash1[11], hash2[11], hash3[11], hash4[11], hash5[11], hash6[11], hash7[11], vhash ); - sph_fugue512_init(&ctx.fugue); - sph_fugue512(&ctx.fugue, (const void*) hash0[11], 64); - sph_fugue512_close(&ctx.fugue, hash0[12]); - sph_fugue512_init(&ctx.fugue); - sph_fugue512(&ctx.fugue, (const void*) hash1[11], 64); - sph_fugue512_close(&ctx.fugue, hash1[12]); - sph_fugue512_init(&ctx.fugue); - sph_fugue512(&ctx.fugue, (const void*) hash2[11], 64); - sph_fugue512_close(&ctx.fugue, hash2[12]); - sph_fugue512_init(&ctx.fugue); - sph_fugue512(&ctx.fugue, (const void*) hash3[11], 64); - sph_fugue512_close(&ctx.fugue, hash3[12]); - sph_fugue512_init(&ctx.fugue); - sph_fugue512(&ctx.fugue, (const void*) hash4[11], 64); - sph_fugue512_close(&ctx.fugue, hash4[12]); - sph_fugue512_init(&ctx.fugue); - sph_fugue512(&ctx.fugue, (const void*) hash5[11], 64); - sph_fugue512_close(&ctx.fugue, hash5[12]); - sph_fugue512_init(&ctx.fugue); - sph_fugue512(&ctx.fugue, (const void*) hash6[11], 64); - sph_fugue512_close(&ctx.fugue, hash6[12]); - sph_fugue512_init(&ctx.fugue); - sph_fugue512(&ctx.fugue, (const void*) hash7[11], 64); - sph_fugue512_close(&ctx.fugue, hash7[12]); + fugue512_full( &ctx.fugue, hash0[12], hash0[11], 64 ); + fugue512_full( &ctx.fugue, hash1[12], hash1[11], 64 ); + fugue512_full( &ctx.fugue, hash2[12], hash2[11], 64 ); + fugue512_full( &ctx.fugue, hash3[12], hash3[11], 64 ); + fugue512_full( &ctx.fugue, hash4[12], hash4[11], 64 ); + fugue512_full( &ctx.fugue, hash5[12], hash5[11], 64 ); + fugue512_full( &ctx.fugue, hash6[12], hash6[11], 64 ); + fugue512_full( &ctx.fugue, hash7[12], hash7[11], 64 ); + intrlv_8x32_512( vhash, hash0[12], hash1[12], hash2[12], hash3[12], hash4[12], hash5[12], hash6[12], hash7[12] ); @@ -652,7 +637,7 @@ union _x25x_4way_ctx_overlay sph_shavite512_context shavite; hashState_sd simd; hamsi512_4way_context hamsi; - sph_fugue512_context fugue; + hashState_fugue fugue; shabal512_4way_context shabal; sph_whirlpool_context whirlpool; sha512_4way_context sha512; @@ -758,18 +743,10 @@ int x25x_4way_hash( void *output, const void *input, int thrid ) hamsi512_4way_close( &ctx.hamsi, vhash ); dintrlv_4x64_512( hash0[11], hash1[11], hash2[11], hash3[11], vhash ); - sph_fugue512_init(&ctx.fugue); - sph_fugue512(&ctx.fugue, (const void*) hash0[11], 64); - sph_fugue512_close(&ctx.fugue, hash0[12]); - sph_fugue512_init(&ctx.fugue); - sph_fugue512(&ctx.fugue, (const void*) hash1[11], 64); - sph_fugue512_close(&ctx.fugue, hash1[12]); - sph_fugue512_init(&ctx.fugue); - sph_fugue512(&ctx.fugue, (const void*) hash2[11], 64); - sph_fugue512_close(&ctx.fugue, hash2[12]); - sph_fugue512_init(&ctx.fugue); - sph_fugue512(&ctx.fugue, (const void*) hash3[11], 64); - sph_fugue512_close(&ctx.fugue, hash3[12]); + fugue512_full( &ctx.fugue, hash0[12], hash0[11], 64 ); + fugue512_full( &ctx.fugue, hash1[12], hash1[11], 64 ); + fugue512_full( &ctx.fugue, hash2[12], hash2[11], 64 ); + fugue512_full( &ctx.fugue, hash3[12], hash3[11], 64 ); intrlv_4x32_512( vhash, hash0[12], hash1[12], hash2[12], hash3[12] ); diff --git a/algo/x22/x25x.c b/algo/x22/x25x.c index b3438099..7855698d 100644 --- a/algo/x22/x25x.c +++ b/algo/x22/x25x.c @@ -7,9 +7,11 @@ #if defined(__AES__) #include "algo/echo/aes_ni/hash_api.h" #include "algo/groestl/aes_ni/hash-groestl.h" + #include "algo/fugue/fugue-aesni.h" #else #include "algo/groestl/sph_groestl.h" #include "algo/echo/sph_echo.h" + #include "algo/fugue/sph_fugue.h" #endif #include "algo/skein/sph_skein.h" #include "algo/jh/sph_jh.h" @@ -19,7 +21,6 @@ #include "algo/shavite/sph_shavite.h" #include "algo/simd/nist.h" #include "algo/hamsi/sph_hamsi.h" -#include "algo/fugue/sph_fugue.h" #include "algo/shabal/sph_shabal.h" #include "algo/whirlpool/sph_whirlpool.h" #include @@ -39,9 +40,11 @@ union _x25x_context_overlay #if defined(__AES__) hashState_groestl groestl; hashState_echo echo; + hashState_fugue fugue; #else sph_groestl512_context groestl; sph_echo512_context echo; + sph_fugue512_context fugue; #endif sph_jh512_context jh; sph_keccak512_context keccak; @@ -51,7 +54,6 @@ union _x25x_context_overlay sph_shavite512_context shavite; hashState_sd simd; sph_hamsi512_context hamsi; - sph_fugue512_context fugue; sph_shabal512_context shabal; sph_whirlpool_context whirlpool; SHA512_CTX sha512; @@ -133,9 +135,13 @@ int x25x_hash( void *output, const void *input, int thrid ) sph_hamsi512(&ctx.hamsi, (const void*) &hash[10], 64); sph_hamsi512_close(&ctx.hamsi, &hash[11]); +#if defined(__AES__) + fugue512_full( &ctx.fugue, &hash[12], &hash[11], 64 ); +#else sph_fugue512_init(&ctx.fugue); sph_fugue512(&ctx.fugue, (const void*) &hash[11], 64); sph_fugue512_close(&ctx.fugue, &hash[12]); +#endif sph_shabal512_init(&ctx.shabal); sph_shabal512(&ctx.shabal, (const void*) &hash[12], 64); diff --git a/build-allarch.sh b/build-allarch.sh index 9f82dd1e..8b022d02 100755 --- a/build-allarch.sh +++ b/build-allarch.sh @@ -51,23 +51,23 @@ mv cpuminer.exe cpuminer-aes-sse42.exe strip -s cpuminer mv cpuminer cpuminer-aes-sse42 -#make clean || echo clean -#rm -f config.status -#CFLAGS="-O3 -march=corei7 -Wall -fno-common" ./configure --with-curl -#make -j 8 -#strip -s cpuminer.exe -#mv cpuminer.exe cpuminer-sse42.exe -#strip -s cpuminer -#mv cpuminer cpuminer-sse42 +make clean || echo clean +rm -f config.status +CFLAGS="-O3 -march=corei7 -Wall -fno-common" ./configure --with-curl +make -j 8 +strip -s cpuminer.exe +mv cpuminer.exe cpuminer-sse42.exe +strip -s cpuminer +mv cpuminer cpuminer-sse42 -#make clean || echo clean -#rm -f config.status -#CFLAGS="-O3 -march=core2 -Wall -fno-common" ./configure --with-curl -#make -j 8 -#strip -s cpuminer.exe -#mv cpuminer.exe cpuminer-ssse3.exe -#strip -s cpuminer -#mv cpuminer cpuminer-ssse3 +make clean || echo clean +rm -f config.status +CFLAGS="-O3 -march=core2 -Wall -fno-common" ./configure --with-curl +make -j 8 +strip -s cpuminer.exe +mv cpuminer.exe cpuminer-ssse3.exe +strip -s cpuminer +mv cpuminer cpuminer-ssse3 make clean || echo clean rm -f config.status diff --git a/clean-all.sh b/clean-all.sh index aba2de3d..42aa3ffc 100755 --- a/clean-all.sh +++ b/clean-all.sh @@ -3,8 +3,8 @@ # imake clean and rm all the targetted executables. # tips to users. -rm cpuminer-avx512-sha-vaes cpuminer-avx512 cpuminer-avx2 cpuminer-avx cpuminer-aes-sse42 cpuminer-sse2 cpuminer-zen > /dev/null +rm cpuminer-avx512-sha-vaes cpuminer-avx512 cpuminer-avx2 cpuminer-avx cpuminer-aes-sse42 cpuminer-sse2 cpuminer-zen cpuminer-sse42 cpuminer-ssse3 > /dev/null -rm cpuminer-avx512-sha-vaes.exe cpuminer-avx512.exe cpuminer-avx2.exe cpuminer-avx.exe cpuminer-aes-sse42.exe cpuminer-sse2.exe cpuminer-zen.exe > /dev/null +rm cpuminer-avx512-sha-vaes.exe cpuminer-avx512.exe cpuminer-avx2.exe cpuminer-avx.exe cpuminer-aes-sse42.exe cpuminer-sse2.exe cpuminer-zen.exe cpuminer-sse42 cpuminer-ssse3 > /dev/null make distclean > /dev/null diff --git a/configure b/configure index d427b666..6bb6cb34 100755 --- a/configure +++ b/configure @@ -1,6 +1,6 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.14.3. +# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.15.0. # # # Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc. @@ -577,8 +577,8 @@ MAKEFLAGS= # Identity of this package. PACKAGE_NAME='cpuminer-opt' PACKAGE_TARNAME='cpuminer-opt' -PACKAGE_VERSION='3.14.3' -PACKAGE_STRING='cpuminer-opt 3.14.3' +PACKAGE_VERSION='3.15.0' +PACKAGE_STRING='cpuminer-opt 3.15.0' PACKAGE_BUGREPORT='' PACKAGE_URL='' @@ -1332,7 +1332,7 @@ if test "$ac_init_help" = "long"; then # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -\`configure' configures cpuminer-opt 3.14.3 to adapt to many kinds of systems. +\`configure' configures cpuminer-opt 3.15.0 to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... @@ -1404,7 +1404,7 @@ fi if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of cpuminer-opt 3.14.3:";; + short | recursive ) echo "Configuration of cpuminer-opt 3.15.0:";; esac cat <<\_ACEOF @@ -1509,7 +1509,7 @@ fi test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF -cpuminer-opt configure 3.14.3 +cpuminer-opt configure 3.15.0 generated by GNU Autoconf 2.69 Copyright (C) 2012 Free Software Foundation, Inc. @@ -2012,7 +2012,7 @@ cat >config.log <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by cpuminer-opt $as_me 3.14.3, which was +It was created by cpuminer-opt $as_me 3.15.0, which was generated by GNU Autoconf 2.69. Invocation command line was $ $0 $@ @@ -2993,7 +2993,7 @@ fi # Define the identity of the package. PACKAGE='cpuminer-opt' - VERSION='3.14.3' + VERSION='3.15.0' cat >>confdefs.h <<_ACEOF @@ -6690,7 +6690,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" -This file was extended by cpuminer-opt $as_me 3.14.3, which was +This file was extended by cpuminer-opt $as_me 3.15.0, which was generated by GNU Autoconf 2.69. Invocation command line was CONFIG_FILES = $CONFIG_FILES @@ -6756,7 +6756,7 @@ _ACEOF cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`" ac_cs_version="\\ -cpuminer-opt config.status 3.14.3 +cpuminer-opt config.status 3.15.0 configured by $0, generated by GNU Autoconf 2.69, with options \\"\$ac_cs_config\\" diff --git a/configure.ac b/configure.ac index 9654d17f..69a0f573 100644 --- a/configure.ac +++ b/configure.ac @@ -1,4 +1,4 @@ -AC_INIT([cpuminer-opt], [3.14.3]) +AC_INIT([cpuminer-opt], [3.15.0]) AC_PREREQ([2.59c]) AC_CANONICAL_SYSTEM diff --git a/cpu-miner.c b/cpu-miner.c index 0cad187a..0b80c530 100644 --- a/cpu-miner.c +++ b/cpu-miner.c @@ -2330,8 +2330,8 @@ static void *miner_thread( void *userdata ) // If unsubmiited nonce(s) found, submit now. if ( unlikely( nonce_found && !opt_benchmark ) ) { - applog( LOG_WARNING, "BUG: See RELEASE_NOTES for reporting bugs. Algo = %s.", - algo_names[ opt_algo ] ); +// applog( LOG_WARNING, "BUG: See RELEASE_NOTES for reporting bugs. Algo = %s.", +// algo_names[ opt_algo ] ); if ( !submit_work( mythr, &work ) ) { applog( LOG_WARNING, "Failed to submit share." ); @@ -2363,14 +2363,14 @@ static void *miner_thread( void *userdata ) prev_hi_temp = hi_temp; curr_temp = cpu_temp(0); - timeval_subtract( &diff, &tv_end, &cpu_temp_time ); if ( curr_temp > hi_temp ) hi_temp = curr_temp; pthread_mutex_unlock( &stats_lock ); if ( !opt_quiet || ( curr_temp >= 80 ) ) { - int wait_time = curr_temp >= 80 ? 30 : curr_temp >= 70 ? 60 : 120; + int wait_time = curr_temp >= 80 ? 20 : curr_temp >= 70 ? 60 : 120; + timeval_subtract( &diff, &tv_end, &cpu_temp_time ); if ( ( diff.tv_sec > wait_time ) || ( curr_temp > prev_hi_temp ) ) { char tempstr[32]; @@ -2747,7 +2747,10 @@ static void *stratum_thread(void *userdata ) sleep(opt_fail_pause); } else + { + restart_threads(); applog(LOG_BLUE,"Stratum connection established" ); + } } report_summary_log( ( stratum_diff != stratum.job.diff ) diff --git a/sysinfos.c b/sysinfos.c index 17aa69e4..c010a9af 100644 --- a/sysinfos.c +++ b/sysinfos.c @@ -1,4 +1,4 @@ -#if !defined(SYSINJFOS_C___) +#if !defined(SYSINFOS_C__) #define SYSINFOS_C__ /** diff --git a/util.c b/util.c index 14c7286a..0eee4282 100644 --- a/util.c +++ b/util.c @@ -1096,9 +1096,10 @@ bool fulltest( const uint32_t *hash, const uint32_t *target ) // increases the effective precision. Due to the floating nature of the // decimal point leading zeros aren't counted. // -// Unfortunately I can't get float128 to work so long double it is. +// Unfortunately I can't get float128 to work so long double (float80) is +// as precise as it gets. // All calculations will be done using long double then converted to double. -// This prevent introducing significant new error while taking advantage +// This prevents introducing significant new error while taking advantage // of HW rounding. #if defined(GCC_INT128) @@ -1107,7 +1108,8 @@ void diff_to_hash( uint32_t *target, const double diff ) { uint128_t *targ = (uint128_t*)target; register long double m = 1. / diff; - targ[0] = 0; +// targ[0] = 0; + targ[0] = -1; targ[1] = (uint128_t)( m * exp96 ); } @@ -1135,7 +1137,8 @@ void diff_to_hash( uint32_t *target, const double diff ) { uint64_t *targ = (uint64_t*)target; register long double m = ( 1. / diff ) * exp32; - targ[1] = targ[0] = 0; +// targ[1] = targ[0] = 0; + targ[1] = targ[0] = -1; targ[3] = (uint64_t)m; targ[2] = (uint64_t)( ( m - (long double)targ[3] ) * exp64 ); }