From 26b94295890094e5e2f2d5b38cfeeff35dc403fe Mon Sep 17 00:00:00 2001 From: Jay D Dee Date: Sat, 11 Nov 2023 16:48:57 -0500 Subject: [PATCH] v23.8 --- Makefile.am | 19 +- RELEASE_NOTES | 7 + algo-gate-api.c | 1 - algo/echo/aes_ni/hash.c | 186 +- algo/groestl/aes_ni/groestl-intr-aes.h | 7 +- algo/hamsi/hamsi-hash-4way.c | 13 +- algo/hodl/aes.c | 183 - algo/hodl/hodl-endian.h | 75 - algo/hodl/hodl-gate.c | 185 - algo/hodl/hodl-gate.h | 6 - algo/hodl/hodl-wolf.c | 225 -- algo/hodl/hodl-wolf.h | 27 - algo/hodl/hodlminer.1 | 208 -- algo/hodl/sha512-avx.h | 50 - algo/hodl/sha512_avx.c | 235 -- algo/hodl/sha512_avx2.c | 241 -- algo/hodl/wolf-aes.h | 25 - algo/m7m/m7m.c | 10 +- algo/sha/sha1-hash.c | 390 +++ algo/sha/sha1-hash.h | 40 + algo/sha/sha1.c | 400 +++ algo/sha/sph_sha1.h | 133 + algo/shavite/sph-shavite-aesni.c | 145 +- algo/simd/simd-hash-2way.c | 352 +- algo/x17/x17-4way.c | 4 +- api.c | 13 +- armbuild-all.sh | 1 - asm/scrypt-arm.S | 1186 ------- asm/scrypt-x64.S | 2907 ---------------- asm/scrypt-x86.S | 830 ----- asm/sha2-arm.S | 1583 --------- asm/sha2-x64.S | 3661 -------------------- asm/sha2-x86.S | 1193 ------- configure | 20 +- configure.ac | 2 +- configure~ | 4405 ++++++++++++++---------- cpu-miner.c | 26 +- miner.h | 36 +- simd-utils/simd-128.h | 189 +- simd-utils/simd-256.h | 125 +- simd-utils/simd-int.h | 5 +- simd-utils/simd-neon.h | 33 +- sysinfos.c | 87 +- winbuild-cross.sh | 8 +- 44 files changed, 4152 insertions(+), 15325 deletions(-) delete mode 100644 algo/hodl/aes.c delete mode 100644 algo/hodl/hodl-endian.h delete mode 100644 algo/hodl/hodl-gate.c delete mode 100644 algo/hodl/hodl-gate.h delete mode 100644 algo/hodl/hodl-wolf.c delete mode 100644 algo/hodl/hodl-wolf.h delete mode 100644 algo/hodl/hodlminer.1 delete mode 100644 algo/hodl/sha512-avx.h delete mode 100644 algo/hodl/sha512_avx.c delete mode 100644 algo/hodl/sha512_avx2.c delete mode 100644 algo/hodl/wolf-aes.h create mode 100644 algo/sha/sha1-hash.c create mode 100644 algo/sha/sha1-hash.h create mode 100644 algo/sha/sha1.c create mode 100644 algo/sha/sph_sha1.h delete mode 100644 asm/scrypt-arm.S delete mode 100644 asm/scrypt-x64.S delete mode 100644 asm/scrypt-x86.S delete mode 100644 asm/sha2-arm.S delete mode 100644 asm/sha2-x64.S delete mode 100644 asm/sha2-x86.S diff --git a/Makefile.am b/Makefile.am index bbe55c50..90d737f5 100644 --- a/Makefile.am +++ b/Makefile.am @@ -79,11 +79,6 @@ cpuminer_SOURCES = \ algo/hamsi/hamsi-hash-4way.c \ algo/haval/haval.c \ algo/haval/haval-hash-4way.c \ - algo/hodl/aes.c \ - algo/hodl/hodl-gate.c \ - algo/hodl/hodl-wolf.c \ - algo/hodl/sha512_avx.c \ - algo/hodl/sha512_avx2.c \ algo/jh/sph_jh.c \ algo/jh/jh-hash-4way.c \ algo/jh/jha-gate.c \ @@ -148,6 +143,8 @@ cpuminer_SOURCES = \ algo/scrypt/scrypt.c \ algo/scrypt/scrypt-core-4way.c \ algo/scrypt/neoscrypt.c \ + algo/sha/sha1.c \ + algo/sha/sha1-hash.c \ algo/sha/sha256-hash.c \ algo/sha/sph_sha2.c \ algo/sha/sph_sha2big.c \ @@ -278,20 +275,10 @@ cpuminer_SOURCES = \ algo/yespower/yespower-ref.c \ algo/yespower/yespower-blake2b-ref.c - disable_flags = if USE_ASM cpuminer_SOURCES += asm/neoscrypt_asm.S -if ARCH_x86 - cpuminer_SOURCES += asm/sha2-x86.S asm/scrypt-x86.S -endif -if ARCH_x86_64 - cpuminer_SOURCES += asm/sha2-x64.S asm/scrypt-x64.S -endif -if ARCH_ARM - cpuminer_SOURCES += asm/sha2-arm.S asm/scrypt-arm.S -endif else disable_flags += -DNOASM endif @@ -301,7 +288,7 @@ if HAVE_WINDOWS endif cpuminer_LDFLAGS = @LDFLAGS@ -cpuminer_LDADD = @LIBCURL@ @JANSSON_LIBS@ @PTHREAD_LIBS@ @WS2_LIBS@ -lssl -lcrypto -lgmp +cpuminer_LDADD = @LIBCURL@ @JANSSON_LIBS@ @PTHREAD_LIBS@ @WS2_LIBS@ -lgmp cpuminer_CPPFLAGS = @LIBCURL_CPPFLAGS@ $(ALL_INCLUDES) cpuminer_CFLAGS = -Wno-pointer-sign -Wno-pointer-to-int-cast $(disable_flags) diff --git a/RELEASE_NOTES b/RELEASE_NOTES index 813bdd40..a553a3f5 100644 --- a/RELEASE_NOTES +++ b/RELEASE_NOTES @@ -73,6 +73,13 @@ If not what makes it happen or not happen? Change Log ---------- +v23.8 + +Cpuminer-opt is no longer dependant on OpenSSL. +Removed Hodl algo. +Removed legacy Sha256 & Scrypt ASM code. +ARM: Echo AES is working and enabled for x17. + v23.7 Fixed blakes2s, broken in v3.23.4. diff --git a/algo-gate-api.c b/algo-gate-api.c index 4f04f362..1886e36c 100644 --- a/algo-gate-api.c +++ b/algo-gate-api.c @@ -310,7 +310,6 @@ bool register_algo_gate( int algo, algo_gate_t *gate ) case ALGO_GROESTL: rc = register_groestl_algo ( gate ); break; case ALGO_HEX: rc = register_hex_algo ( gate ); break; case ALGO_HMQ1725: rc = register_hmq1725_algo ( gate ); break; - case ALGO_HODL: rc = register_hodl_algo ( gate ); break; case ALGO_JHA: rc = register_jha_algo ( gate ); break; case ALGO_KECCAK: rc = register_keccak_algo ( gate ); break; case ALGO_KECCAKC: rc = register_keccakc_algo ( gate ); break; diff --git a/algo/echo/aes_ni/hash.c b/algo/echo/aes_ni/hash.c index 7968a7f9..057dedff 100644 --- a/algo/echo/aes_ni/hash.c +++ b/algo/echo/aes_ni/hash.c @@ -21,112 +21,92 @@ #include "hash_api.h" #include "simd-utils.h" -MYALIGN const unsigned int _k_s0F[] = {0x0F0F0F0F, 0x0F0F0F0F, 0x0F0F0F0F, 0x0F0F0F0F}; -MYALIGN const unsigned int _k_ipt[] = {0x5A2A7000, 0xC2B2E898, 0x52227808, 0xCABAE090, 0x317C4D00, 0x4C01307D, 0xB0FDCC81, 0xCD80B1FC}; -MYALIGN const unsigned int _k_opt[] = {0xD6B66000, 0xFF9F4929, 0xDEBE6808, 0xF7974121, 0x50BCEC00, 0x01EDBD51, 0xB05C0CE0, 0xE10D5DB1}; -MYALIGN const unsigned int _k_inv[] = {0x0D080180, 0x0E05060F, 0x0A0B0C02, 0x04070309, 0x0F0B0780, 0x01040A06, 0x02050809, 0x030D0E0C}; -MYALIGN const unsigned int _k_sb1[] = {0xCB503E00, 0xB19BE18F, 0x142AF544, 0xA5DF7A6E, 0xFAE22300, 0x3618D415, 0x0D2ED9EF, 0x3BF7CCC1}; -MYALIGN const unsigned int _k_sb2[] = {0x0B712400, 0xE27A93C6, 0xBC982FCD, 0x5EB7E955, 0x0AE12900, 0x69EB8840, 0xAB82234A, 0xC2A163C8}; -MYALIGN const unsigned int _k_sb3[] = {0xC0211A00, 0x53E17249, 0xA8B2DA89, 0xFB68933B, 0xF0030A00, 0x5FF35C55, 0xA6ACFAA5, 0xF956AF09}; -MYALIGN const unsigned int _k_sb4[] = {0x3FD64100, 0xE1E937A0, 0x49087E9F, 0xA876DE97, 0xC393EA00, 0x3D50AED7, 0x876D2914, 0xBA44FE79}; -MYALIGN const unsigned int _k_sb5[] = {0xF4867F00, 0x5072D62F, 0x5D228BDB, 0x0DA9A4F9, 0x3971C900, 0x0B487AC2, 0x8A43F0FB, 0x81B332B8}; -MYALIGN const unsigned int _k_sb7[] = {0xFFF75B00, 0xB20845E9, 0xE1BAA416, 0x531E4DAC, 0x3390E000, 0x62A3F282, 0x21C1D3B1, 0x43125170}; -MYALIGN const unsigned int _k_sbo[] = {0x6FBDC700, 0xD0D26D17, 0xC502A878, 0x15AABF7A, 0x5FBB6A00, 0xCFE474A5, 0x412B35FA, 0x8E1E90D1}; -MYALIGN const unsigned int _k_h63[] = {0x63636363, 0x63636363, 0x63636363, 0x63636363}; -MYALIGN const unsigned int _k_hc6[] = {0xc6c6c6c6, 0xc6c6c6c6, 0xc6c6c6c6, 0xc6c6c6c6}; -MYALIGN const unsigned int _k_h5b[] = {0x5b5b5b5b, 0x5b5b5b5b, 0x5b5b5b5b, 0x5b5b5b5b}; -MYALIGN const unsigned int _k_h4e[] = {0x4e4e4e4e, 0x4e4e4e4e, 0x4e4e4e4e, 0x4e4e4e4e}; -MYALIGN const unsigned int _k_h0e[] = {0x0e0e0e0e, 0x0e0e0e0e, 0x0e0e0e0e, 0x0e0e0e0e}; -MYALIGN const unsigned int _k_h15[] = {0x15151515, 0x15151515, 0x15151515, 0x15151515}; -MYALIGN const unsigned int _k_aesmix1[] = {0x0f0a0500, 0x030e0904, 0x07020d08, 0x0b06010c}; -MYALIGN const unsigned int _k_aesmix2[] = {0x000f0a05, 0x04030e09, 0x0807020d, 0x0c0b0601}; -MYALIGN const unsigned int _k_aesmix3[] = {0x05000f0a, 0x0904030e, 0x0d080702, 0x010c0b06}; -MYALIGN const unsigned int _k_aesmix4[] = {0x0a05000f, 0x0e090403, 0x020d0807, 0x06010c0b}; - - -MYALIGN const unsigned int const1[] = {0x00000001, 0x00000000, 0x00000000, 0x00000000}; -MYALIGN const unsigned int mul2mask[] = {0x00001b00, 0x00000000, 0x00000000, 0x00000000}; -MYALIGN const unsigned int lsbmask[] = {0x01010101, 0x01010101, 0x01010101, 0x01010101}; -MYALIGN const unsigned int invshiftrows[] = {0x070a0d00, 0x0b0e0104, 0x0f020508, 0x0306090c}; -MYALIGN const unsigned int zero[] = {0x00000000, 0x00000000, 0x00000000, 0x00000000}; -MYALIGN const unsigned int mul2ipt[] = {0x728efc00, 0x6894e61a, 0x3fc3b14d, 0x25d9ab57, 0xfd5ba600, 0x2a8c71d7, 0x1eb845e3, 0xc96f9234}; - - -#define ECHO_SUBBYTES4(state, j) \ - state[0][j] = v128_aesenc(state[0][j], k1);\ - k1 = v128_add32(k1, cast_v128(const1));\ - state[1][j] = v128_aesenc(state[1][j], k1);\ - k1 = v128_add32(k1, cast_v128(const1));\ - state[2][j] = v128_aesenc(state[2][j], k1);\ - k1 = v128_add32(k1, cast_v128(const1));\ - state[3][j] = v128_aesenc(state[3][j], k1);\ - k1 = v128_add32(k1, cast_v128(const1));\ - state[0][j] = v128_aesenc(state[0][j], v128_zero ); \ - state[1][j] = v128_aesenc(state[1][j], v128_zero ); \ - state[2][j] = v128_aesenc(state[2][j], v128_zero ); \ - state[3][j] = v128_aesenc(state[3][j], v128_zero ) - -#define ECHO_SUBBYTES(state, i, j) \ - state[i][j] = v128_aesenc(state[i][j], k1);\ - k1 = v128_add32(k1, cast_v128(const1));\ - state[i][j] = v128_aesenc(state[i][j], cast_v128(zero)) - -#define ECHO_MIXBYTES(state1, state2, j, t1, t2, s2) \ - s2 = v128_add8(state1[0][j], state1[0][j]);\ - t1 = v128_sr16(state1[0][j], 7);\ - t1 = v128_and(t1, cast_v128(lsbmask));\ - t2 = v128_shuffle8(cast_v128(mul2mask), t1);\ - s2 = v128_xor(s2, t2);\ - state2[0][j] = s2;\ - state2[1][j] = state1[0][j];\ - state2[2][j] = state1[0][j];\ - state2[3][j] = v128_xor(s2, state1[0][j]);\ - s2 = v128_add8(state1[1][(j + 1) & 3], state1[1][(j + 1) & 3]);\ - t1 = v128_sr16(state1[1][(j + 1) & 3], 7);\ - t1 = v128_and(t1, cast_v128(lsbmask));\ - t2 = v128_shuffle8(cast_v128(mul2mask), t1);\ - s2 = v128_xor(s2, t2);\ - state2[0][j] = v128_xor3(state2[0][j], s2, state1[1][(j + 1) & 3] );\ - state2[1][j] = v128_xor(state2[1][j], s2);\ - state2[2][j] = v128_xor(state2[2][j], state1[1][(j + 1) & 3]);\ - state2[3][j] = v128_xor(state2[3][j], state1[1][(j + 1) & 3]);\ - s2 = v128_add8(state1[2][(j + 2) & 3], state1[2][(j + 2) & 3]);\ - t1 = v128_sr16(state1[2][(j + 2) & 3], 7);\ - t1 = v128_and(t1, cast_v128(lsbmask));\ - t2 = v128_shuffle8(cast_v128(mul2mask), t1);\ - s2 = v128_xor(s2, t2);\ - state2[0][j] = v128_xor(state2[0][j], state1[2][(j + 2) & 3]);\ - state2[1][j] = v128_xor3(state2[1][j], s2, state1[2][(j + 2) & 3] );\ - state2[2][j] = v128_xor(state2[2][j], s2);\ - state2[3][j] = v128_xor(state2[3][j], state1[2][(j + 2) & 3]);\ - s2 = v128_add8(state1[3][(j + 3) & 3], state1[3][(j + 3) & 3]);\ - t1 = v128_sr16(state1[3][(j + 3) & 3], 7);\ - t1 = v128_and(t1, cast_v128(lsbmask));\ - t2 = v128_shuffle8(cast_v128(mul2mask), t1);\ - s2 = v128_xor(s2, t2);\ - state2[0][j] = v128_xor(state2[0][j], state1[3][(j + 3) & 3]);\ - state2[1][j] = v128_xor(state2[1][j], state1[3][(j + 3) & 3]);\ - state2[2][j] = v128_xor3(state2[2][j], s2, state1[3][(j + 3) & 3] );\ - state2[3][j] = v128_xor(state2[3][j], s2) +const uint32_t const1[] __attribute__ ((aligned (32))) = + { 0x00000001, 0x00000000, 0x00000000, 0x00000000 }; +const uint32_t mul2mask[] __attribute__ ((aligned (16))) = + { 0x00001b00, 0x00000000, 0x00000000, 0x00000000 }; +const uint32_t lsbmask[] __attribute__ ((aligned (16))) = + { 0x01010101, 0x01010101, 0x01010101, 0x01010101 }; +const uint32_t invshiftrows[] __attribute__ ((aligned (16))) = + { 0x070a0d00, 0x0b0e0104, 0x0f020508, 0x0306090c }; + +#define ECHO_SUBBYTES4( state, j ) \ + state[0][j] = v128_aesenc( state[0][j], k1 ); \ + k1 = v128_add32( k1, cast_v128(const1) ); \ + state[1][j] = v128_aesenc( state[1][j], k1 ); \ + k1 = v128_add32( k1, cast_v128(const1) ); \ + state[2][j] = v128_aesenc( state[2][j], k1 ); \ + k1 = v128_add32( k1, cast_v128(const1) ); \ + state[3][j] = v128_aesenc( state[3][j], k1 ); \ + k1 = v128_add32( k1, cast_v128(const1) ); \ + state[0][j] = v128_aesenc_nokey( state[0][j] ); \ + state[1][j] = v128_aesenc_nokey( state[1][j] ); \ + state[2][j] = v128_aesenc_nokey( state[2][j] ); \ + state[3][j] = v128_aesenc_nokey( state[3][j] ) + +#define ECHO_SUBBYTES( state, i, j ) \ + state[i][j] = v128_aesenc( state[i][j], k1 ); \ + k1 = v128_add32( k1, cast_v128(const1) ); \ + state[i][j] = v128_aesenc_nokey( state[i][j] ) + +#define ECHO_MIXBYTES( state1, state2, j, t1, t2, s2 ) \ + s2 = v128_add8( state1[0][j], state1[0][j] ); \ + t1 = v128_sr16( state1[0][j], 7 ); \ + t1 = v128_and( t1, cast_v128(lsbmask) ); \ + t2 = v128_shuffle8( cast_v128(mul2mask), t1 ); \ + s2 = v128_xor( s2, t2 ); \ + state2[0][j] = s2; \ + state2[1][j] = state1[0][j]; \ + state2[2][j] = state1[0][j]; \ + state2[3][j] = v128_xor(s2, state1[0][j] ); \ + s2 = v128_add8( state1[1][(j + 1) & 3], state1[1][(j + 1) & 3] ); \ + t1 = v128_sr16( state1[1][(j + 1) & 3], 7 ); \ + t1 = v128_and( t1, cast_v128(lsbmask) ); \ + t2 = v128_shuffle8( cast_v128(mul2mask), t1 ); \ + s2 = v128_xor( s2, t2 ); \ + state2[0][j] = v128_xor3( state2[0][j], s2, state1[1][(j + 1) & 3] );\ + state2[1][j] = v128_xor( state2[1][j], s2 ); \ + state2[2][j] = v128_xor( state2[2][j], state1[1][(j + 1) & 3] ); \ + state2[3][j] = v128_xor( state2[3][j], state1[1][(j + 1) & 3] ); \ + s2 = v128_add8( state1[2][(j + 2) & 3], state1[2][(j + 2) & 3] ); \ + t1 = v128_sr16( state1[2][(j + 2) & 3], 7 ); \ + t1 = v128_and( t1, cast_v128(lsbmask) ); \ + t2 = v128_shuffle8( cast_v128(mul2mask), t1 ); \ + s2 = v128_xor( s2, t2 ); \ + state2[0][j] = v128_xor( state2[0][j], state1[2][(j + 2) & 3] ); \ + state2[1][j] = v128_xor3( state2[1][j], s2, state1[2][(j + 2) & 3] ); \ + state2[2][j] = v128_xor( state2[2][j], s2 ); \ + state2[3][j] = v128_xor( state2[3][j], state1[2][(j + 2) & 3] ); \ + s2 = v128_add8( state1[3][(j + 3) & 3], state1[3][(j + 3) & 3] ); \ + t1 = v128_sr16( state1[3][(j + 3) & 3], 7 ); \ + t1 = v128_and( t1, cast_v128(lsbmask) ); \ + t2 = v128_shuffle8( cast_v128(mul2mask), t1 ); \ + s2 = v128_xor( s2, t2 ); \ + state2[0][j] = v128_xor( state2[0][j], state1[3][(j + 3) & 3] ); \ + state2[1][j] = v128_xor( state2[1][j], state1[3][(j + 3) & 3] ); \ + state2[2][j] = v128_xor3( state2[2][j], s2, state1[3][(j + 3) & 3] ); \ + state2[3][j] = v128_xor( state2[3][j], s2 ) #define ECHO_ROUND_UNROLL2 \ - ECHO_SUBBYTES4(_state, 0);\ - ECHO_SUBBYTES4(_state, 1);\ - ECHO_SUBBYTES4(_state, 2);\ - ECHO_SUBBYTES4(_state, 3);\ - ECHO_MIXBYTES(_state, _state2, 0, t1, t2, s2);\ - ECHO_MIXBYTES(_state, _state2, 1, t1, t2, s2);\ - ECHO_MIXBYTES(_state, _state2, 2, t1, t2, s2);\ - ECHO_MIXBYTES(_state, _state2, 3, t1, t2, s2);\ - ECHO_SUBBYTES4(_state2, 0);\ - ECHO_SUBBYTES4(_state2, 1);\ - ECHO_SUBBYTES4(_state2, 2);\ - ECHO_SUBBYTES4(_state2, 3);\ - ECHO_MIXBYTES(_state2, _state, 0, t1, t2, s2);\ - ECHO_MIXBYTES(_state2, _state, 1, t1, t2, s2);\ - ECHO_MIXBYTES(_state2, _state, 2, t1, t2, s2);\ - ECHO_MIXBYTES(_state2, _state, 3, t1, t2, s2) +{ \ + ECHO_SUBBYTES4( _state, 0 ); \ + ECHO_SUBBYTES4( _state, 1 ); \ + ECHO_SUBBYTES4( _state, 2 ); \ + ECHO_SUBBYTES4( _state, 3 ); \ + ECHO_MIXBYTES( _state, _state2, 0, t1, t2, s2 ); \ + ECHO_MIXBYTES( _state, _state2, 1, t1, t2, s2 ); \ + ECHO_MIXBYTES( _state, _state2, 2, t1, t2, s2 ); \ + ECHO_MIXBYTES( _state, _state2, 3, t1, t2, s2 ); \ + ECHO_SUBBYTES4( _state2, 0 ); \ + ECHO_SUBBYTES4( _state2, 1 ); \ + ECHO_SUBBYTES4( _state2, 2 ); \ + ECHO_SUBBYTES4( _state2, 3 ); \ + ECHO_MIXBYTES( _state2, _state, 0, t1, t2, s2 ); \ + ECHO_MIXBYTES( _state2, _state, 1, t1, t2, s2 ); \ + ECHO_MIXBYTES( _state2, _state, 2, t1, t2, s2 ); \ + ECHO_MIXBYTES( _state2, _state, 3, t1, t2, s2 ); \ +} /* #define ECHO_ROUND_UNROLL2 \ diff --git a/algo/groestl/aes_ni/groestl-intr-aes.h b/algo/groestl/aes_ni/groestl-intr-aes.h index 3c42f178..ceb69ce8 100644 --- a/algo/groestl/aes_ni/groestl-intr-aes.h +++ b/algo/groestl/aes_ni/groestl-intr-aes.h @@ -61,9 +61,12 @@ static const v128u64_t SUBSH_MASK7 = { 0x06090c0f0205080b, 0x0e0104070a0d0003 }; #if defined(__ARM_NEON) // No fast shuffle on NEON -static const uint32x4_t vmask_d8 = { 3, 1, 2, 0 }; +//static const uint32x4_t vmask_d8 = { 3, 1, 2, 0 }; +static const v128u32_t BLEND_MASK = { 0xffffffff, 0, 0, 0xffffffff }; -#define gr_shuffle32( v ) v128_shufflev32( v, vmask_d8 ) +#define gr_shuffle32( v ) v128_blendv( v128_qrev32( v ), v, BLEND_MASK ) + +//#define gr_shuffle32( v ) v128_shufflev32( v, vmask_d8 ) #else diff --git a/algo/hamsi/hamsi-hash-4way.c b/algo/hamsi/hamsi-hash-4way.c index 15c2a049..b14b1281 100644 --- a/algo/hamsi/hamsi-hash-4way.c +++ b/algo/hamsi/hamsi-hash-4way.c @@ -35,7 +35,7 @@ #include #include "hamsi-hash-4way.h" -static const uint32_t HAMSI_IV512[] = +static const uint32_t HAMSI_IV512[] __attribute__ ((aligned (32))) = { 0x73746565, 0x6c706172, 0x6b204172, 0x656e6265, 0x72672031, 0x302c2062, 0x75732032, 0x3434362c, @@ -43,7 +43,8 @@ static const uint32_t HAMSI_IV512[] = 0x65766572, 0x6c65652c, 0x2042656c, 0x6769756d }; -static const uint32_t alpha_n[] = { +static const uint32_t alpha_n[] __attribute__ ((aligned (32))) = +{ 0xff00f0f0, 0xccccaaaa, 0xf0f0cccc, 0xff00aaaa, 0xccccaaaa, 0xf0f0ff00, 0xaaaacccc, 0xf0f0ff00, 0xf0f0cccc, 0xaaaaff00, 0xccccff00, 0xaaaaf0f0, @@ -54,7 +55,8 @@ static const uint32_t alpha_n[] = { 0xff00cccc, 0xaaaaf0f0, 0xff00aaaa, 0xccccf0f0 }; -static const uint32_t alpha_f[] = { +static const uint32_t alpha_f[] __attribute__ ((aligned (32))) = +{ 0xcaf9639c, 0x0ff0f9c0, 0x639c0ff0, 0xcaf9f9c0, 0x0ff0f9c0, 0x639ccaf9, 0xf9c00ff0, 0x639ccaf9, 0x639c0ff0, 0xf9c0caf9, 0x0ff0caf9, 0xf9c0639c, @@ -69,7 +71,8 @@ static const uint32_t alpha_f[] = { /* Note: this table lists bits within each byte from least siginificant to most significant. */ -static const uint32_t T512[64][16] = { +static const uint32_t T512[64][16] __attribute__ ((aligned (32))) = +{ { 0xef0b0270, 0x3afd0000, 0x5dae0000, 0x69490000, 0x9b0f3c06, 0x4405b5f9, 0x66140a51, 0x924f5d0a, 0xc96b0030, 0xe7250000, 0x2f840000, 0x264f0000, @@ -2260,4 +2263,4 @@ void hamsi512_2x64( void *dst, const void *data, size_t len ) hamsi512_2x64_close( &sc, dst ); } -#endif // SSE4.1 or NEON +#endif // SSE4.2 or NEON diff --git a/algo/hodl/aes.c b/algo/hodl/aes.c deleted file mode 100644 index 380adfd9..00000000 --- a/algo/hodl/aes.c +++ /dev/null @@ -1,183 +0,0 @@ -#include -#include "miner.h" - -#if defined(__AES__) - -#include -#include "wolf-aes.h" - -static inline void ExpandAESKey256_sub1(__m128i *tmp1, __m128i *tmp2) -{ - __m128i tmp4; - *tmp2 = _mm_shuffle_epi32(*tmp2, 0xFF); - tmp4 = _mm_slli_si128(*tmp1, 0x04); - *tmp1 = _mm_xor_si128(*tmp1, tmp4); - tmp4 = _mm_slli_si128(tmp4, 0x04); - *tmp1 = _mm_xor_si128(*tmp1, tmp4); - tmp4 = _mm_slli_si128(tmp4, 0x04); - *tmp1 = _mm_xor_si128(*tmp1, tmp4); - *tmp1 = _mm_xor_si128(*tmp1, *tmp2); -} - -static inline void ExpandAESKey256_sub2(__m128i *tmp1, __m128i *tmp3) -{ - __m128i tmp2, tmp4; - - tmp4 = _mm_aeskeygenassist_si128(*tmp1, 0x00); - tmp2 = _mm_shuffle_epi32(tmp4, 0xAA); - tmp4 = _mm_slli_si128(*tmp3, 0x04); - *tmp3 = _mm_xor_si128(*tmp3, tmp4); - tmp4 = _mm_slli_si128(tmp4, 0x04); - *tmp3 = _mm_xor_si128(*tmp3, tmp4); - tmp4 = _mm_slli_si128(tmp4, 0x04); - *tmp3 = _mm_xor_si128(*tmp3, tmp4); - *tmp3 = _mm_xor_si128(*tmp3, tmp2); -} - -// Special thanks to Intel for helping me -// with ExpandAESKey256() and its subroutines -void ExpandAESKey256(__m128i *keys, const __m128i *KeyBuf) -{ - __m128i tmp1, tmp2, tmp3; - - tmp1 = keys[0] = KeyBuf[0]; - tmp3 = keys[1] = KeyBuf[1]; - - tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x01); - ExpandAESKey256_sub1(&tmp1, &tmp2); - keys[2] = tmp1; - ExpandAESKey256_sub2(&tmp1, &tmp3); - keys[3] = tmp3; - - tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x02); - ExpandAESKey256_sub1(&tmp1, &tmp2); - keys[4] = tmp1; - ExpandAESKey256_sub2(&tmp1, &tmp3); - keys[5] = tmp3; - - tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x04); - ExpandAESKey256_sub1(&tmp1, &tmp2); - keys[6] = tmp1; - ExpandAESKey256_sub2(&tmp1, &tmp3); - keys[7] = tmp3; - - tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x08); - ExpandAESKey256_sub1(&tmp1, &tmp2); - keys[8] = tmp1; - ExpandAESKey256_sub2(&tmp1, &tmp3); - keys[9] = tmp3; - - tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x10); - ExpandAESKey256_sub1(&tmp1, &tmp2); - keys[10] = tmp1; - ExpandAESKey256_sub2(&tmp1, &tmp3); - keys[11] = tmp3; - - tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x20); - ExpandAESKey256_sub1(&tmp1, &tmp2); - keys[12] = tmp1; - ExpandAESKey256_sub2(&tmp1, &tmp3); - keys[13] = tmp3; - - tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x40); - ExpandAESKey256_sub1(&tmp1, &tmp2); - keys[14] = tmp1; -} - -#if defined(__SSE4_2__) -//#ifdef __AVX__ - -#define AESENC(i,j) \ - State[j] = _mm_aesenc_si128(State[j], ExpandedKey[j][i]); - -#define AESENC_N(i) \ - AESENC(i,0) \ - AESENC(i,1) \ - AESENC(i,2) \ - AESENC(i,3) \ - AESENC(i,4) \ - AESENC(i,5) \ - AESENC(i,6) \ - AESENC(i,7) \ - - -static inline void AES256Core(__m128i* State, __m128i ExpandedKey[][16]) -{ - const uint32_t N = AES_PARALLEL_N; - - for(int j=0; j> 8) & 0xff) | (((x) & 0xff) << 8))) - -static __inline unsigned short int -__bswap_16 (unsigned short int __bsx) -{ - return __bswap_constant_16 (__bsx); -} - -// LE -# define htobe16(x) __bswap_16 (x) -# define htole16(x) (x) -# define be16toh(x) __bswap_16 (x) -# define le16toh(x) (x) - -// BE -//# define htole16(x) __bswap_16 (x) -//# define htobe16(x) (x) -//# define le16toh(x) __bswap_16 (x) -//# define be16toh(x) (x) - -#define __bswap_constant_32(x) \ - ((((x) & 0xff000000) >> 24) | (((x) & 0x00ff0000) >> 8) | \ - (((x) & 0x0000ff00) << 8) | (((x) & 0x000000ff) << 24)) - -static __inline unsigned int -__bswap_32 (unsigned int __bsx) -{ - return __builtin_bswap32 (__bsx); -} - -// LE -# define htobe32(x) __bswap_32 (x) -# define htole32(x) (x) -# define be32toh(x) __bswap_32 (x) -# define le32toh(x) (x) - -// BE -//# define htole32(x) __bswap_32 (x) -//# define htobe32(x) (x) -//# define le32toh(x) __bswap_32 (x) -//# define be32toh(x) (x) - -# define __bswap_constant_64(x) \ - ((((x) & 0xff00000000000000ull) >> 56) \ - | (((x) & 0x00ff000000000000ull) >> 40) \ - | (((x) & 0x0000ff0000000000ull) >> 24) \ - | (((x) & 0x000000ff00000000ull) >> 8) \ - | (((x) & 0x00000000ff000000ull) << 8) \ - | (((x) & 0x0000000000ff0000ull) << 24) \ - | (((x) & 0x000000000000ff00ull) << 40) \ - | (((x) & 0x00000000000000ffull) << 56)) - -static __inline uint64_t -__bswap_64 (uint64_t __bsx) -{ - return __bswap_constant_64 (__bsx); -} - -// LE -# define htobe64(x) __bswap_64 (x) -# define htole64(x) (x) -# define be64toh(x) __bswap_64 (x) -# define le64toh(x) (x) - -// BE -//# define htole64(x) __bswap_64 (x) -//# define htobe64(x) (x) -//# define le64toh(x) __bswap_64 (x) -//# define be64toh(x) (x) - -#endif \ No newline at end of file diff --git a/algo/hodl/hodl-gate.c b/algo/hodl/hodl-gate.c deleted file mode 100644 index 914d5fde..00000000 --- a/algo/hodl/hodl-gate.c +++ /dev/null @@ -1,185 +0,0 @@ -#include -//#include -#include - -#include "hodl-gate.h" -#include "hodl-wolf.h" - -#define HODL_NSTARTLOC_INDEX 20 -#define HODL_NFINALCALC_INDEX 21 - -static struct work hodl_work; - -pthread_barrier_t hodl_barrier; - -// All references to this buffer are local to this file, so no args -// need to be passed. -unsigned char *hodl_scratchbuf = NULL; - -void hodl_le_build_stratum_request( char* req, struct work* work, - struct stratum_ctx *sctx ) -{ - uint32_t ntime, nonce, nstartloc, nfinalcalc; - char ntimestr[9], noncestr[9], nstartlocstr[9], nfinalcalcstr[9]; - unsigned char *xnonce2str; - - le32enc( &ntime, work->data[ algo_gate.ntime_index ] ); - le32enc( &nonce, work->data[ algo_gate.nonce_index ] ); - bin2hex( ntimestr, (char*)(&ntime), sizeof(uint32_t) ); - bin2hex( noncestr, (char*)(&nonce), sizeof(uint32_t) ); - xnonce2str = abin2hex(work->xnonce2, work->xnonce2_len ); - le32enc( &nstartloc, work->data[ HODL_NSTARTLOC_INDEX ] ); - le32enc( &nfinalcalc, work->data[ HODL_NFINALCALC_INDEX ] ); - bin2hex( nstartlocstr, (char*)(&nstartloc), sizeof(uint32_t) ); - bin2hex( nfinalcalcstr, (char*)(&nfinalcalc), sizeof(uint32_t) ); - sprintf( req, "{\"method\": \"mining.submit\", \"params\": [\"%s\", \"%s\", \"%s\", \"%s\", \"%s\", \"%s\", \"%s\"], \"id\":4}", - rpc_user, work->job_id, xnonce2str, ntimestr, noncestr, - nstartlocstr, nfinalcalcstr ); - free( xnonce2str ); -} - -char* hodl_malloc_txs_request( struct work *work ) -{ - char* req; - json_t *val; - char data_str[2 * sizeof(work->data) + 1]; - int i; - - for ( i = 0; i < ARRAY_SIZE(work->data); i++ ) - be32enc( work->data + i, work->data[i] ); - - bin2hex( data_str, (unsigned char *)work->data, 88 ); - if ( work->workid ) - { - char *params; - val = json_object(); - json_object_set_new( val, "workid", json_string( work->workid ) ); - params = json_dumps( val, 0 ); - json_decref( val ); - req = malloc( 128 + 2*88 + strlen( work->txs ) + strlen( params ) ); - sprintf( req, - "{\"method\": \"submitblock\", \"params\": [\"%s%s\", %s], \"id\":1}\r\n", - data_str, work->txs, params); - free( params ); - } - else - { - req = malloc( 128 + 2*88 + strlen(work->txs)); - sprintf( req, - "{\"method\": \"submitblock\", \"params\": [\"%s%s\"], \"id\":1}\r\n", - data_str, work->txs); - } - return req; -} - -void hodl_build_block_header( struct work* g_work, uint32_t version, - uint32_t *prevhash, uint32_t *merkle_tree, - uint32_t ntime, uint32_t nbits ) -{ - int i; - - memset( g_work->data, 0, sizeof(g_work->data) ); - g_work->data[0] = version; - - if ( have_stratum ) - for ( i = 0; i < 8; i++ ) - g_work->data[ 1+i ] = le32dec( prevhash + i ); - else - for (i = 0; i < 8; i++) - g_work->data[ 8-i ] = le32dec( prevhash + i ); - - for ( i = 0; i < 8; i++ ) - g_work->data[ 9+i ] = be32dec( merkle_tree + i ); - - g_work->data[ algo_gate.ntime_index ] = ntime; - g_work->data[ algo_gate.nbits_index ] = nbits; - g_work->data[22] = 0x80000000; - g_work->data[31] = 0x00000280; -} - -// called only by thread 0, saves a backup of g_work -void hodl_get_new_work( struct work* work, struct work* g_work) -{ -// pthread_rwlock_rdlock( &g_work_lock ); - - work_free( &hodl_work ); - work_copy( &hodl_work, g_work ); - hodl_work.data[ algo_gate.nonce_index ] = ( clock() + rand() ) % 9999; - -// pthread_rwlock_unlock( &g_work_lock ); -} - -json_t *hodl_longpoll_rpc_call( CURL *curl, int *err, char* lp_url ) -{ - json_t *val; - char *req = NULL; - - if ( have_gbt ) - { - req = malloc( strlen( gbt_lp_req ) + strlen( lp_id ) + 1 ); - sprintf( req, gbt_lp_req, lp_id ); - } - val = json_rpc_call( curl, lp_url, rpc_userpass, - req ? req : getwork_req, err, JSON_RPC_LONGPOLL ); - free( req ); - return val; -} - -// called by every thread, copies the backup to each thread's work. -void hodl_resync_threads( int thr_id, struct work* work ) -{ - int nonce_index = algo_gate.nonce_index; - pthread_barrier_wait( &hodl_barrier ); - if ( memcmp( work->data, hodl_work.data, algo_gate.work_cmp_size ) ) - { - work_free( work ); - work_copy( work, &hodl_work ); - } - work->data[ nonce_index ] = swab32( hodl_work.data[ nonce_index ] ); - work_restart[thr_id].restart = 0; -} - -bool hodl_do_this_thread( int thr_id ) -{ - return ( thr_id == 0 ); -} - -int hodl_scanhash( struct work* work, uint32_t max_nonce, - uint64_t *hashes_done, struct thr_info *mythr ) -{ -#if defined(__AES__) - GenRandomGarbage( (CacheEntry*)hodl_scratchbuf, work->data, mythr->id ); - pthread_barrier_wait( &hodl_barrier ); - return scanhash_hodl_wolf( work, max_nonce, hashes_done, mythr ); -#endif - return false; -} - -bool register_hodl_algo( algo_gate_t* gate ) -{ -#if !defined(__AES__) - applog( LOG_ERR, "Only CPUs with AES are supported, use legacy version."); - return false; -#endif - - if ( GARBAGE_SIZE % opt_n_threads ) - applog( LOG_WARNING,"WARNING: Thread count must be power of 2. Miner may crash or produce invalid hash!" ); - - pthread_barrier_init( &hodl_barrier, NULL, opt_n_threads ); - gate->optimizations = SSE42_OPT | AES_OPT | AVX2_OPT; - gate->scanhash = (void*)&hodl_scanhash; - gate->get_new_work = (void*)&hodl_get_new_work; - gate->longpoll_rpc_call = (void*)&hodl_longpoll_rpc_call; - gate->build_stratum_request = (void*)&hodl_le_build_stratum_request; - gate->malloc_txs_request = (void*)&hodl_malloc_txs_request; - gate->build_block_header = (void*)&hodl_build_block_header; - gate->resync_threads = (void*)&hodl_resync_threads; - gate->do_this_thread = (void*)&hodl_do_this_thread; - gate->work_cmp_size = 76; - hodl_scratchbuf = (unsigned char*)mm_malloc( 1 << 30, 64 ); - allow_getwork = false; - opt_target_factor = 8388608.0; - return ( hodl_scratchbuf != NULL ); -} - - diff --git a/algo/hodl/hodl-gate.h b/algo/hodl/hodl-gate.h deleted file mode 100644 index 9a8ecf75..00000000 --- a/algo/hodl/hodl-gate.h +++ /dev/null @@ -1,6 +0,0 @@ -#include "algo-gate-api.h" - -extern unsigned char *hodl_scratchbuf; - -bool register_hodl_algo ( algo_gate_t* gate ); - diff --git a/algo/hodl/hodl-wolf.c b/algo/hodl/hodl-wolf.c deleted file mode 100644 index ea3c7776..00000000 --- a/algo/hodl/hodl-wolf.c +++ /dev/null @@ -1,225 +0,0 @@ -#include -#include -#include -#include "simd-utils.h" -#include "sha512-avx.h" -#include "wolf-aes.h" -#include "hodl-gate.h" -#include "hodl-wolf.h" -#include "miner.h" -#include "algo/sha/sha256d.h" - -#if defined(__AES__) - -void GenerateGarbageCore( CacheEntry *Garbage, int ThreadID, int ThreadCount, - void *MidHash ) -{ - const int Chunk = TOTAL_CHUNKS / ThreadCount; - const uint32_t StartChunk = ThreadID * Chunk; - const uint32_t EndChunk = StartChunk + Chunk; - -#if defined(__SSE4_2__) -//#ifdef __AVX__ - uint64_t* TempBufs[ SHA512_PARALLEL_N ] ; - uint64_t* desination[ SHA512_PARALLEL_N ]; - - for ( int i=0; i < SHA512_PARALLEL_N; ++i ) - { - TempBufs[i] = (uint64_t*)malloc( 32 ); - memcpy( TempBufs[i], MidHash, 32 ); - } - - for ( uint32_t i = StartChunk; i < EndChunk; i += SHA512_PARALLEL_N ) - { - for ( int j = 0; j < SHA512_PARALLEL_N; ++j ) - { - ( (uint32_t*)TempBufs[j] )[0] = i + j; - desination[j] = (uint64_t*)( (uint8_t *)Garbage + ( (i+j) - * GARBAGE_CHUNK_SIZE ) ); - } - sha512Compute32b_parallel( TempBufs, desination ); - } - - for ( int i = 0; i < SHA512_PARALLEL_N; ++i ) - free( TempBufs[i] ); -#else - uint32_t TempBuf[8]; - memcpy( TempBuf, MidHash, 32 ); - - for ( uint32_t i = StartChunk; i < EndChunk; ++i ) - { - TempBuf[0] = i; - SHA512( ( uint8_t *)TempBuf, 32, - ( (uint8_t *)Garbage ) + ( i * GARBAGE_CHUNK_SIZE ) ); - } -#endif -} - -/* -void Rev256(uint32_t *Dest, const uint32_t *Src) -{ - for(int i = 0; i < 8; ++i) Dest[i] = swab32(Src[i]); -} -*/ - -int scanhash_hodl_wolf( struct work* work, uint32_t max_nonce, - uint64_t *hashes_done, struct thr_info *mythr ) -{ -#if defined(__SSE4_2__) -//#ifdef __AVX__ - uint32_t *pdata = work->data; - uint32_t *ptarget = work->target; - int threadNumber = mythr->id; - CacheEntry *Garbage = (CacheEntry*)hodl_scratchbuf; - CacheEntry Cache[AES_PARALLEL_N] __attribute__ ((aligned (64))); - __m128i* data[AES_PARALLEL_N]; - const __m128i* next[AES_PARALLEL_N]; - uint32_t CollisionCount = 0; - - for ( int n=0; n> 2) - 1] & (COMPARE_SIZE - 1); //% COMPARE_SIZE; - next[n] = Garbage[nextLocation].dqwords; - - __m128i last[2]; - last[0] = _mm_xor_si128(Cache[n].dqwords[254], next[n][254]); - last[1] = _mm_xor_si128(Cache[n].dqwords[255], next[n][255]); - - // Key is last 32b of Cache - // IV is last 16b of Cache - ExpandAESKey256(ExpKey[n], last); - ivs[n] = last[1]; - } - AES256CBC(data, next, ExpKey, ivs); - } - - for(int n=0; n> 2) - 1] & (COMPARE_SIZE - 1)) < 1000) - { - uint32_t BlockHdr[22], FinalPoW[8]; - - swab32_array( BlockHdr, pdata, 20 ); - - BlockHdr[20] = k + n; - BlockHdr[21] = Cache[n].dwords[(GARBAGE_SLICE_SIZE >> 2) - 2]; - - sha256d( (uint8_t *)FinalPoW, (uint8_t *)BlockHdr, 88 ); - CollisionCount++; - if( FinalPoW[7] <= ptarget[7] ) - { - pdata[20] = swab32( BlockHdr[20] ); - pdata[21] = swab32( BlockHdr[21] ); - *hashes_done = CollisionCount; - submit_solution( work, FinalPoW, mythr ); - return(0); - } - } - } - - *hashes_done = CollisionCount; - return(0); - - -#else // no AVX - - uint32_t *pdata = work->data; - uint32_t *ptarget = work->target; - uint32_t BlockHdr[22], FinalPoW[8]; - CacheEntry *Garbage = (CacheEntry*)hodl_scratchbuf; - CacheEntry Cache; - uint32_t CollisionCount = 0; - int threadNumber = mythr->id; - - swab32_array( BlockHdr, pdata, 20 ); - // Search for pattern in psuedorandom data - int searchNumber = COMPARE_SIZE / opt_n_threads; - int startLoc = threadNumber * searchNumber; - - if ( opt_debug ) - applog( LOG_DEBUG,"Hash target= %08lx", ptarget[7] ); - - for(int32_t k = startLoc; k < startLoc + searchNumber && !work_restart[threadNumber].restart; k++) - { - // copy data to first l2 cache - memcpy(Cache.dwords, Garbage + k, GARBAGE_SLICE_SIZE); - for(int j = 0; j < AES_ITERATIONS; j++) - { - CacheEntry TmpXOR; - __m128i ExpKey[16]; - - // use last 4 bytes of first cache as next location - uint32_t nextLocation = Cache.dwords[(GARBAGE_SLICE_SIZE >> 2) - - 1] & (COMPARE_SIZE - 1); //% COMPARE_SIZE; - - // Copy data from indicated location to second l2 cache - - memcpy(&TmpXOR, Garbage + nextLocation, GARBAGE_SLICE_SIZE); - //XOR location data into second cache - for( int i = 0; i < (GARBAGE_SLICE_SIZE >> 4); ++i ) - TmpXOR.dqwords[i] = _mm_xor_si128( Cache.dqwords[i], - TmpXOR.dqwords[i] ); - // Key is last 32b of TmpXOR - // IV is last 16b of TmpXOR - - ExpandAESKey256( ExpKey, TmpXOR.dqwords + - (GARBAGE_SLICE_SIZE / sizeof(__m128i)) - 2 ); - AES256CBC( Cache.dqwords, TmpXOR.dqwords, ExpKey, - TmpXOR.dqwords[ (GARBAGE_SLICE_SIZE / sizeof(__m128i)) - - 1 ], 256 ); } - // use last X bits as solution - if( ( Cache.dwords[ (GARBAGE_SLICE_SIZE >> 2) - 1 ] - & (COMPARE_SIZE - 1) ) < 1000 ) - { - BlockHdr[20] = k; - BlockHdr[21] = Cache.dwords[ (GARBAGE_SLICE_SIZE >> 2) - 2 ]; - sha256d( (uint8_t *)FinalPoW, (uint8_t *)BlockHdr, 88 ); - CollisionCount++; - if( FinalPoW[7] <= ptarget[7] ) - { - pdata[20] = swab32( BlockHdr[20] ); - pdata[21] = swab32( BlockHdr[21] ); - *hashes_done = CollisionCount; - submit_solution( work, FinalPoW, mythr ); - return(0); - } - } - } - - *hashes_done = CollisionCount; - return(0); - -#endif // AVX else - -} - -void GenRandomGarbage(CacheEntry *Garbage, uint32_t *pdata, int thr_id) -{ - uint32_t BlockHdr[20], MidHash[8]; - swab32_array( BlockHdr, pdata, 20 ); - sha256d((uint8_t *)MidHash, (uint8_t *)BlockHdr, 80); - GenerateGarbageCore(Garbage, thr_id, opt_n_threads, MidHash); -} - -#endif // AES - diff --git a/algo/hodl/hodl-wolf.h b/algo/hodl/hodl-wolf.h deleted file mode 100644 index 679d359f..00000000 --- a/algo/hodl/hodl-wolf.h +++ /dev/null @@ -1,27 +0,0 @@ -#ifndef __HODL_H -#define __HODL_H - -#include -#include "simd-utils.h" -#include "miner.h" - -#define AES_ITERATIONS 15 - -#define GARBAGE_SIZE (1 << 30) -#define GARBAGE_CHUNK_SIZE (1 << 6) -#define GARBAGE_SLICE_SIZE (1 << 12) -#define TOTAL_CHUNKS (1 << 24) // GARBAGE_SIZE / GARBAGE_CHUNK_SIZE -#define COMPARE_SIZE (1 << 18) // GARBAGE_SIZE / GARBAGE_SLICE_SIZE - -typedef union _CacheEntry -{ - uint32_t dwords[GARBAGE_SLICE_SIZE >> 2] __attribute__((aligned(16))); - v128_t dqwords[GARBAGE_SLICE_SIZE >> 4] __attribute__((aligned(16))); -} CacheEntry; - -int scanhash_hodl_wolf( struct work* work, uint32_t max_nonce, - uint64_t *hashes_done, struct thr_info *mythr ); - -void GenRandomGarbage( CacheEntry *Garbage, uint32_t *pdata, int thr_id); - -#endif // __HODL_H diff --git a/algo/hodl/hodlminer.1 b/algo/hodl/hodlminer.1 deleted file mode 100644 index da855e61..00000000 --- a/algo/hodl/hodlminer.1 +++ /dev/null @@ -1,208 +0,0 @@ -.TH MINERD 1 "March 2016" "cpuminer 2.4.3" -.SH NAME -hodlminer \- CPU miner for Hodlcoin -.SH SYNOPSIS -.B hodlminer -[\fIOPTION\fR]... -.SH DESCRIPTION -.B hodlminer -is a multi-threaded CPU miner for Hodlcoin. -It supports the getwork and getblocktemplate (BIP 22) methods, -as well as the Stratum mining protocol. -.PP -In its normal mode of operation, \fBhodlminer\fR connects to a mining server -(specified with the \fB\-o\fR option), receives work from it and starts hashing. -As soon as a solution is found, it is submitted to the same mining server, -which can accept or reject it. -When using getwork or getblocktemplate, -\fBhodlminer\fR can take advantage of long polling, if the server supports it; -in any case, fresh work is fetched as needed. -When using the Stratum protocol this is not possible, -and the server is responsible for sending fresh work at least every minute; -if it fails to do so, -\fBhodlminer\fR may drop the connection and try reconnecting again. -.PP -By default, \fBhodlminer\fR writes all its messages to standard error. -On systems that have a syslog, the \fB\-\-syslog\fR option can be used -to write to it instead. -.PP -On start, the nice value of all miner threads is set to 19. -On Linux, the scheduling policy is also changed to SCHED_IDLE, -or to SCHED_BATCH if that fails. -On multiprocessor systems, \fBhodlminer\fR -automatically sets the CPU affinity of miner threads -if the number of threads is a multiple of the number of processors. -.SH EXAMPLES -To connect to the Hodlcoin mining pool that provides a Stratum server -at hodl.blockquarry.com on port 8332, authenticating as worker "user.worker" with password "x": -.PP -.nf -.RS -hodlminer \-o stratum+tcp://hodl.blockquarry.com:8332 \-u user.worker -p x -q -.RE -.fi -.PP -To mine to a local Hodlcoin instance running on port 18332, -authenticating with username "rpcuser" and password "rpcpass": -.PP -.nf -.RS -hodlminer \-a hodl \-o http://localhost:18332 \-O rpcuser:rpcpass \\ - \-\-coinbase\-addr=mpXwg4jMtRhuSpVq4xS3HFHmCmWp9NyGKt -.RE -.fi -.PP -.SH OPTIONS -.TP -\fB\-a\fR, \fB\-\-algo\fR=\fIALGORITHM\fR -Set the hashing algorithm to use. -Default is hodl. -Possible values are: -.RS 11 -.TP 10 -.B hodl -.TP -\fB\-\-benchmark\fR -Run in offline benchmark mode. -.TP -\fB\-B\fR, \fB\-\-background\fR -Run in the background as a daemon. -.TP -\fB\-\-cert\fR=\fIFILE\fR -Set an SSL certificate to use with the mining server. -Only supported when using the HTTPS protocol. -.TP -\fB\-\-coinbase\-addr\fR=\fIADDRESS\fR -Set a payout address for solo mining. -This is only used in getblocktemplate mode, -and only if the server does not provide a coinbase transaction. -.TP -\fB\-\-coinbase\-sig\fR=\fITEXT\fR -Set a string to be included in the coinbase (if allowed by the server). -This is only used in getblocktemplate mode. -.TP -\fB\-c\fR, \fB\-\-config\fR=\fIFILE\fR -Load options from a configuration file. -\fIFILE\fR must contain a JSON object -mapping long options to their arguments (as strings), -or to \fBtrue\fR if no argument is required. -Sample configuration file: - -.nf - { - "url": "stratum+tcp://hodl.blockquarry.com:8332", - "userpass": "foo:bar", - "retry-pause": "10", - "quiet": true - } -.fi -.TP -\fB\-D\fR, \fB\-\-debug\fR -Enable debug output. -.TP -\fB\-h\fR, \fB\-\-help\fR -Print a help message and exit. -.TP -\fB\-\-no\-gbt\fR -Do not use the getblocktemplate RPC method. -.TP -\fB\-\-no\-getwork\fR -Do not use the getwork RPC method. -.TP -\fB\-\-no\-longpoll\fR -Do not use long polling. -.TP -\fB\-\-no\-redirect\fR -Ignore requests from the server to switch to a different URL. -.TP -\fB\-\-no\-stratum\fR -Do not switch to Stratum, even if the server advertises support for it. -.TP -\fB\-o\fR, \fB\-\-url\fR=[\fISCHEME\fR://][\fIUSERNAME\fR[:\fIPASSWORD\fR]@]\fIHOST\fR:\fIPORT\fR[/\fIPATH\fR] -Set the URL of the mining server to connect to. -Supported schemes are \fBhttp\fR, \fBhttps\fR, \fBstratum+tcp\fR -and \fBstratum+tcps\fR. -If no scheme is specified, http is assumed. -Specifying a \fIPATH\fR is only supported for HTTP and HTTPS. -Specifying credentials has the same effect as using the \fB\-O\fR option. - -By default, on HTTP and HTTPS, -the miner tries to use the getblocktemplate RPC method, -and falls back to using getwork if getblocktemplate is unavailable. -This behavior can be modified by using the \fB\-\-no\-gbt\fR -and \fB\-\-no\-getwork\fR options. -.TP -\fB\-O\fR, \fB\-\-userpass\fR=\fIUSERNAME\fR:\fIPASSWORD\fR -Set the credentials to use for connecting to the mining server. -Any value previously set with \fB\-u\fR or \fB\-p\fR is discarded. -.TP -\fB\-p\fR, \fB\-\-pass\fR=\fIPASSWORD\fR -Set the password to use for connecting to the mining server. -Any password previously set with \fB\-O\fR is discarded. -.TP -\fB\-P\fR, \fB\-\-protocol\-dump\fR -Enable output of all protocol-level activities. -.TP -\fB\-q\fR, \fB\-\-quiet\fR -Disable per-thread hashmeter output. -.TP -\fB\-r\fR, \fB\-\-retries\fR=\fIN\fR -Set the maximum number of times to retry if a network call fails. -If not specified, the miner will retry indefinitely. -.TP -\fB\-R\fR, \fB\-\-retry\-pause\fR=\fISECONDS\fR -Set how long to wait between retries. Default is 30 seconds. -.TP -\fB\-s\fR, \fB\-\-scantime\fR=\fISECONDS\fR -Set an upper bound on the time the miner can go without fetching fresh work. -This setting has no effect in Stratum mode or when long polling is activated. -Default is 5 seconds. -.TP -\fB\-S\fR, \fB\-\-syslog\fR -Log to the syslog facility instead of standard error. -.TP -\fB\-t\fR, \fB\-\-threads\fR=\fIN\fR -Set the number of miner threads. -If not specified, the miner will try to detect the number of available processors -and use that. -.TP -\fB\-T\fR, \fB\-\-timeout\fR=\fISECONDS\fR -Set a timeout for long polling. -.TP -\fB\-u\fR, \fB\-\-user\fR=\fIUSERNAME\fR -Set the username to use for connecting to the mining server. -Any username previously set with \fB\-O\fR is discarded. -.TP -\fB\-V\fR, \fB\-\-version\fR -Display version information and quit. -.TP -\fB\-x\fR, \fB\-\-proxy\fR=[\fISCHEME\fR://][\fIUSERNAME\fR:\fIPASSWORD\fR@]\fIHOST\fR:\fIPORT\fR -Connect to the mining server through a proxy. -Supported schemes are: \fBhttp\fR, \fBsocks4\fR, \fBsocks5\fR. -Since libcurl 7.18.0, the following are also supported: -\fBsocks4a\fR, \fBsocks5h\fR (SOCKS5 with remote name resolving). -If no scheme is specified, the proxy is treated as an HTTP proxy. -.SH ENVIRONMENT -The following environment variables can be specified in lower case or upper case; -the lower-case version has precedence. \fBhttp_proxy\fR is an exception -as it is only available in lower case. -.PP -.RS -.TP -\fBhttp_proxy\fR [\fISCHEME\fR://]\fIHOST\fR:\fIPORT\fR -Sets the proxy server to use for HTTP. -.TP -\fBHTTPS_PROXY\fR [\fISCHEME\fR://]\fIHOST\fR:\fIPORT\fR -Sets the proxy server to use for HTTPS. -.TP -\fBALL_PROXY\fR [\fISCHEME\fR://]\fIHOST\fR:\fIPORT\fR -Sets the proxy server to use if no protocol-specific proxy is set. -.RE -.PP -Using an environment variable to set the proxy has the same effect as -using the \fB\-x\fR option. -.SH AUTHOR -Most of the code in the current version of minerd was written by -Pooler with contributions from others. - -The original minerd was written by Jeff Garzik . diff --git a/algo/hodl/sha512-avx.h b/algo/hodl/sha512-avx.h deleted file mode 100644 index bbc8b3bf..00000000 --- a/algo/hodl/sha512-avx.h +++ /dev/null @@ -1,50 +0,0 @@ -#ifndef _SHA512_H -#define _SHA512_H - -#include -#include "simd-utils.h" - -//SHA-512 block size -#define SHA512_BLOCK_SIZE 128 -//SHA-512 digest size -#define SHA512_DIGEST_SIZE 64 - -/* -#ifndef __AVX2__ -#ifndef __AVX__ -#error "Either AVX or AVX2 supported needed" -#endif // __AVX__ -#endif // __AVX2__ -*/ - -typedef struct -{ -#ifdef __AVX2__ - __m256i h[8]; - __m256i w[80]; -#elif defined(__SSE4_2__) -//#elif defined(__AVX__) - v128_t h[8]; - v128_t w[80]; -#else - int dummy; -#endif -} Sha512Context; - -#ifdef __AVX2__ -#define SHA512_PARALLEL_N 8 -#elif defined(__SSE4_2__) -//#elif defined(__AVX__) -#define SHA512_PARALLEL_N 4 -#else -#define SHA512_PARALLEL_N 1 // dummy value -#endif - -//SHA-512 related functions -void sha512Compute32b_parallel( - uint64_t *data[SHA512_PARALLEL_N], - uint64_t *digest[SHA512_PARALLEL_N]); - -void sha512ProcessBlock(Sha512Context contexti[2] ); - -#endif diff --git a/algo/hodl/sha512_avx.c b/algo/hodl/sha512_avx.c deleted file mode 100644 index 1c7c0892..00000000 --- a/algo/hodl/sha512_avx.c +++ /dev/null @@ -1,235 +0,0 @@ -#ifndef __AVX2__ - -#if defined(__SSE4_2__) -//#ifdef __AVX__ - -//Dependencies -#include -#include - -#ifdef __FreeBSD__ -#include -#endif - -#if defined(__CYGWIN__) -#include -#endif - -#include "tmmintrin.h" -#include "smmintrin.h" - -#include "sha512-avx.h" -#if ((defined(_WIN64) || defined(__WINDOWS__))) -#include "hodl-endian.h" -#endif - -//SHA-512 auxiliary functions -#define Ch(x, y, z) (((x) & (y)) | (~(x) & (z))) -#define Maj(x, y, z) (((x) & (y)) | ((x) & (z)) | ((y) & (z))) -#define SIGMA1(x) (ROR64(x, 28) ^ ROR64(x, 34) ^ ROR64(x, 39)) -#define SIGMA2(x) (ROR64(x, 14) ^ ROR64(x, 18) ^ ROR64(x, 41)) -#define SIGMA3(x) (ROR64(x, 1) ^ ROR64(x, 8) ^ SHR64(x, 7)) -#define SIGMA4(x) (ROR64(x, 19) ^ ROR64(x, 61) ^ SHR64(x, 6)) - -//Rotate right operation -#define ROR64(a, n) _mm_or_si128(_mm_srli_epi64(a, n), _mm_slli_epi64(a, 64 - n)) - -//Shift right operation -#define SHR64(a, n) _mm_srli_epi64(a, n) - -__m128i mm_htobe_epi64(__m128i a) { - __m128i mask = _mm_set_epi8(8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7); - return _mm_shuffle_epi8(a, mask); -} - -__m128i mm_betoh_epi64(__m128i a) { - return mm_htobe_epi64(a); -} - -//SHA-512 padding -static const uint8_t padding[128] = -{ - 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 -}; - -//SHA-512 constants -static const uint64_t k[80] = -{ - 0x428A2F98D728AE22, 0x7137449123EF65CD, 0xB5C0FBCFEC4D3B2F, 0xE9B5DBA58189DBBC, - 0x3956C25BF348B538, 0x59F111F1B605D019, 0x923F82A4AF194F9B, 0xAB1C5ED5DA6D8118, - 0xD807AA98A3030242, 0x12835B0145706FBE, 0x243185BE4EE4B28C, 0x550C7DC3D5FFB4E2, - 0x72BE5D74F27B896F, 0x80DEB1FE3B1696B1, 0x9BDC06A725C71235, 0xC19BF174CF692694, - 0xE49B69C19EF14AD2, 0xEFBE4786384F25E3, 0x0FC19DC68B8CD5B5, 0x240CA1CC77AC9C65, - 0x2DE92C6F592B0275, 0x4A7484AA6EA6E483, 0x5CB0A9DCBD41FBD4, 0x76F988DA831153B5, - 0x983E5152EE66DFAB, 0xA831C66D2DB43210, 0xB00327C898FB213F, 0xBF597FC7BEEF0EE4, - 0xC6E00BF33DA88FC2, 0xD5A79147930AA725, 0x06CA6351E003826F, 0x142929670A0E6E70, - 0x27B70A8546D22FFC, 0x2E1B21385C26C926, 0x4D2C6DFC5AC42AED, 0x53380D139D95B3DF, - 0x650A73548BAF63DE, 0x766A0ABB3C77B2A8, 0x81C2C92E47EDAEE6, 0x92722C851482353B, - 0xA2BFE8A14CF10364, 0xA81A664BBC423001, 0xC24B8B70D0F89791, 0xC76C51A30654BE30, - 0xD192E819D6EF5218, 0xD69906245565A910, 0xF40E35855771202A, 0x106AA07032BBD1B8, - 0x19A4C116B8D2D0C8, 0x1E376C085141AB53, 0x2748774CDF8EEB99, 0x34B0BCB5E19B48A8, - 0x391C0CB3C5C95A63, 0x4ED8AA4AE3418ACB, 0x5B9CCA4F7763E373, 0x682E6FF3D6B2B8A3, - 0x748F82EE5DEFB2FC, 0x78A5636F43172F60, 0x84C87814A1F0AB72, 0x8CC702081A6439EC, - 0x90BEFFFA23631E28, 0xA4506CEBDE82BDE9, 0xBEF9A3F7B2C67915, 0xC67178F2E372532B, - 0xCA273ECEEA26619C, 0xD186B8C721C0C207, 0xEADA7DD6CDE0EB1E, 0xF57D4F7FEE6ED178, - 0x06F067AA72176FBA, 0x0A637DC5A2C898A6, 0x113F9804BEF90DAE, 0x1B710B35131C471B, - 0x28DB77F523047D84, 0x32CAAB7B40C72493, 0x3C9EBE0A15C9BEBC, 0x431D67C49C100D4C, - 0x4CC5D4BECB3E42B6, 0x597F299CFC657E2A, 0x5FCB6FAB3AD6FAEC, 0x6C44198C4A475817 -}; - - -void sha512Compute32b_parallel(uint64_t *data[SHA512_PARALLEL_N], uint64_t *digest[SHA512_PARALLEL_N]) { - Sha512Context context[2]; - context[0].h[0] = _mm_set1_epi64x(0x6A09E667F3BCC908); - context[0].h[1] = _mm_set1_epi64x(0xBB67AE8584CAA73B); - context[0].h[2] = _mm_set1_epi64x(0x3C6EF372FE94F82B); - context[0].h[3] = _mm_set1_epi64x(0xA54FF53A5F1D36F1); - context[0].h[4] = _mm_set1_epi64x(0x510E527FADE682D1); - context[0].h[5] = _mm_set1_epi64x(0x9B05688C2B3E6C1F); - context[0].h[6] = _mm_set1_epi64x(0x1F83D9ABFB41BD6B); - context[0].h[7] = _mm_set1_epi64x(0x5BE0CD19137E2179); - - context[1].h[0] = _mm_set1_epi64x(0x6A09E667F3BCC908); - context[1].h[1] = _mm_set1_epi64x(0xBB67AE8584CAA73B); - context[1].h[2] = _mm_set1_epi64x(0x3C6EF372FE94F82B); - context[1].h[3] = _mm_set1_epi64x(0xA54FF53A5F1D36F1); - context[1].h[4] = _mm_set1_epi64x(0x510E527FADE682D1); - context[1].h[5] = _mm_set1_epi64x(0x9B05688C2B3E6C1F); - context[1].h[6] = _mm_set1_epi64x(0x1F83D9ABFB41BD6B); - context[1].h[7] = _mm_set1_epi64x(0x5BE0CD19137E2179); - - for(int i=0; i<4; ++i) { - context[0].w[i] = _mm_set_epi64x ( data[1][i], data[0][i] ); - context[1].w[i] = _mm_set_epi64x ( data[3][i], data[2][i] ); - } - for(int i=0; i<10; ++i) { - context[0].w[i+4] = _mm_set1_epi64x( ((uint64_t*)padding)[i] ); - context[1].w[i+4] = _mm_set1_epi64x( ((uint64_t*)padding)[i] ); - } - - //Length of the original message (before padding) - uint64_t totalSize = 32 * 8; - - //Append the length of the original message - context[0].w[14] = _mm_set1_epi64x(0); - context[0].w[15] = _mm_set1_epi64x(htobe64(totalSize)); - - context[1].w[14] = _mm_set1_epi64x(0); - context[1].w[15] = _mm_set1_epi64x(htobe64(totalSize)); - - //Calculate the message digest - sha512ProcessBlock(context); - - //Convert from host byte order to big-endian byte order - for (int i = 0; i < 8; i++) { - context[0].h[i] = mm_htobe_epi64(context[0].h[i]); - context[1].h[i] = mm_htobe_epi64(context[1].h[i]); - } - - //Copy the resulting digest - for(int i=0; i<8; ++i) { - digest[0][i] = _mm_extract_epi64(context[0].h[i], 0); - digest[1][i] = _mm_extract_epi64(context[0].h[i], 1); - digest[2][i] = _mm_extract_epi64(context[1].h[i], 0); - digest[3][i] = _mm_extract_epi64(context[1].h[i], 1); - } -} - -#define blk0(n, i) (block[n][i] = mm_betoh_epi64(block[n][i])) -#define blk(n, i) (block[n][i] = block[n][i - 16] + SIGMA3(block[n][i - 15]) + \ - SIGMA4(block[n][i - 2]) + block[n][i - 7]) - -#define ROUND512(a,b,c,d,e,f,g,h) \ - T0 += (h[0]) + SIGMA2(e[0]) + Ch((e[0]), (f[0]), (g[0])) + k[i]; \ - T1 += (h[1]) + SIGMA2(e[1]) + Ch((e[1]), (f[1]), (g[1])) + k[i]; \ - (d[0]) += T0; \ - (d[1]) += T1; \ - (h[0]) = T0 + SIGMA1(a[0]) + Maj((a[0]), (b[0]), (c[0])); \ - (h[1]) = T1 + SIGMA1(a[1]) + Maj((a[1]), (b[1]), (c[1])); \ - i++ - -#define ROUND512_0_TO_15(a,b,c,d,e,f,g,h) \ - T0 = blk0(0, i); \ - T1 = blk0(1, i); \ - ROUND512(a,b,c,d,e,f,g,h) - -#define ROUND512_16_TO_80(a,b,c,d,e,f,g,h) \ - T0 = blk(0, i); \ - T1 = blk(1, i); \ - ROUND512(a,b,c,d,e,f,g,h) - -#define R512_0 \ - ROUND512_0_TO_15(a, b, c, d, e, f, g, h); \ - ROUND512_0_TO_15(h, a, b, c, d, e, f, g); \ - ROUND512_0_TO_15(g, h, a, b, c, d, e, f); \ - ROUND512_0_TO_15(f, g, h, a, b, c, d, e); \ - ROUND512_0_TO_15(e, f, g, h, a, b, c, d); \ - ROUND512_0_TO_15(d, e, f, g, h, a, b, c); \ - ROUND512_0_TO_15(c, d, e, f, g, h, a, b); \ - ROUND512_0_TO_15(b, c, d, e, f, g, h, a) - -#define R512_16 \ - ROUND512_16_TO_80(a, b, c, d, e, f, g, h); \ - ROUND512_16_TO_80(h, a, b, c, d, e, f, g); \ - ROUND512_16_TO_80(g, h, a, b, c, d, e, f); \ - ROUND512_16_TO_80(f, g, h, a, b, c, d, e); \ - ROUND512_16_TO_80(e, f, g, h, a, b, c, d); \ - ROUND512_16_TO_80(d, e, f, g, h, a, b, c); \ - ROUND512_16_TO_80(c, d, e, f, g, h, a, b); \ - ROUND512_16_TO_80(b, c, d, e, f, g, h, a) - -#define INIT(x,n) \ - x[0] = context[0].h[n]; \ - x[1] = context[1].h[n]; \ - -void sha512ProcessBlock(Sha512Context context[2]) -{ - __m128i* block[2]; - block[0] = context[0].w; - block[1] = context[1].w; - - __m128i T0, T1; - __m128i a[2], b[2], c[2], d[2], e[2], f[2], g[2], h[2]; - INIT(a, 0) - INIT(b, 1) - INIT(c, 2) - INIT(d, 3) - INIT(e, 4) - INIT(f, 5) - INIT(g, 6) - INIT(h, 7) - - int i = 0; - R512_0; R512_0; - for(int j=0; j<8; ++j) { - R512_16; - } - - context[0].h[0] += a[0]; - context[0].h[1] += b[0]; - context[0].h[2] += c[0]; - context[0].h[3] += d[0]; - context[0].h[4] += e[0]; - context[0].h[5] += f[0]; - context[0].h[6] += g[0]; - context[0].h[7] += h[0]; - - context[1].h[0] += a[1]; - context[1].h[1] += b[1]; - context[1].h[2] += c[1]; - context[1].h[3] += d[1]; - context[1].h[4] += e[1]; - context[1].h[5] += f[1]; - context[1].h[6] += g[1]; - context[1].h[7] += h[1]; -} - -#endif // __AVX__ -#endif // __AVX2__ diff --git a/algo/hodl/sha512_avx2.c b/algo/hodl/sha512_avx2.c deleted file mode 100644 index 58e421c7..00000000 --- a/algo/hodl/sha512_avx2.c +++ /dev/null @@ -1,241 +0,0 @@ -#ifdef __AVX2__ - -//Dependencies -#include -#include - -#ifdef __FreeBSD__ -#include -#endif - -#if defined(__CYGWIN__) -#include -#endif - -#include "tmmintrin.h" -#include "smmintrin.h" -#include "immintrin.h" - -#include "sha512-avx.h" -#if ((defined(_WIN64) || defined(__WINDOWS__))) -#include "hodl-endian.h" -#endif - -//SHA-512 auxiliary functions -#define Ch(x, y, z) (((x) & (y)) | (~(x) & (z))) -#define Maj(x, y, z) (((x) & (y)) | ((x) & (z)) | ((y) & (z))) -#define SIGMA1(x) (ROR64(x, 28) ^ ROR64(x, 34) ^ ROR64(x, 39)) -#define SIGMA2(x) (ROR64(x, 14) ^ ROR64(x, 18) ^ ROR64(x, 41)) -#define SIGMA3(x) (ROR64(x, 1) ^ ROR64(x, 8) ^ SHR64(x, 7)) -#define SIGMA4(x) (ROR64(x, 19) ^ ROR64(x, 61) ^ SHR64(x, 6)) - -//Rotate right operation -#define ROR64(a, n) _mm256_or_si256(_mm256_srli_epi64(a, n), _mm256_slli_epi64(a, 64 - n)) - -//Shift right operation -#define SHR64(a, n) _mm256_srli_epi64(a, n) - -__m256i mm256_htobe_epi64(__m256i a) { - __m256i mask = _mm256_set_epi8( - 24,25,26,27,28,29,30,31, - 16,17,18,19,20,21,22,23, - 8, 9, 10, 11, 12, 13, 14, 15, - 0, 1, 2, 3, 4, 5, 6, 7); - return _mm256_shuffle_epi8(a, mask); -} - -__m256i mm256_betoh_epi64(__m256i a) { - return mm256_htobe_epi64(a); -} - -//SHA-512 padding -static const uint8_t padding[128] = -{ - 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 -}; - -//SHA-512 constants -static const uint64_t k[80] = -{ - 0x428A2F98D728AE22, 0x7137449123EF65CD, 0xB5C0FBCFEC4D3B2F, 0xE9B5DBA58189DBBC, - 0x3956C25BF348B538, 0x59F111F1B605D019, 0x923F82A4AF194F9B, 0xAB1C5ED5DA6D8118, - 0xD807AA98A3030242, 0x12835B0145706FBE, 0x243185BE4EE4B28C, 0x550C7DC3D5FFB4E2, - 0x72BE5D74F27B896F, 0x80DEB1FE3B1696B1, 0x9BDC06A725C71235, 0xC19BF174CF692694, - 0xE49B69C19EF14AD2, 0xEFBE4786384F25E3, 0x0FC19DC68B8CD5B5, 0x240CA1CC77AC9C65, - 0x2DE92C6F592B0275, 0x4A7484AA6EA6E483, 0x5CB0A9DCBD41FBD4, 0x76F988DA831153B5, - 0x983E5152EE66DFAB, 0xA831C66D2DB43210, 0xB00327C898FB213F, 0xBF597FC7BEEF0EE4, - 0xC6E00BF33DA88FC2, 0xD5A79147930AA725, 0x06CA6351E003826F, 0x142929670A0E6E70, - 0x27B70A8546D22FFC, 0x2E1B21385C26C926, 0x4D2C6DFC5AC42AED, 0x53380D139D95B3DF, - 0x650A73548BAF63DE, 0x766A0ABB3C77B2A8, 0x81C2C92E47EDAEE6, 0x92722C851482353B, - 0xA2BFE8A14CF10364, 0xA81A664BBC423001, 0xC24B8B70D0F89791, 0xC76C51A30654BE30, - 0xD192E819D6EF5218, 0xD69906245565A910, 0xF40E35855771202A, 0x106AA07032BBD1B8, - 0x19A4C116B8D2D0C8, 0x1E376C085141AB53, 0x2748774CDF8EEB99, 0x34B0BCB5E19B48A8, - 0x391C0CB3C5C95A63, 0x4ED8AA4AE3418ACB, 0x5B9CCA4F7763E373, 0x682E6FF3D6B2B8A3, - 0x748F82EE5DEFB2FC, 0x78A5636F43172F60, 0x84C87814A1F0AB72, 0x8CC702081A6439EC, - 0x90BEFFFA23631E28, 0xA4506CEBDE82BDE9, 0xBEF9A3F7B2C67915, 0xC67178F2E372532B, - 0xCA273ECEEA26619C, 0xD186B8C721C0C207, 0xEADA7DD6CDE0EB1E, 0xF57D4F7FEE6ED178, - 0x06F067AA72176FBA, 0x0A637DC5A2C898A6, 0x113F9804BEF90DAE, 0x1B710B35131C471B, - 0x28DB77F523047D84, 0x32CAAB7B40C72493, 0x3C9EBE0A15C9BEBC, 0x431D67C49C100D4C, - 0x4CC5D4BECB3E42B6, 0x597F299CFC657E2A, 0x5FCB6FAB3AD6FAEC, 0x6C44198C4A475817 -}; - - -void sha512Compute32b_parallel(uint64_t *data[SHA512_PARALLEL_N], uint64_t *digest[SHA512_PARALLEL_N]) { - Sha512Context context[2]; - context[0].h[0] = _mm256_set1_epi64x(0x6A09E667F3BCC908); - context[0].h[1] = _mm256_set1_epi64x(0xBB67AE8584CAA73B); - context[0].h[2] = _mm256_set1_epi64x(0x3C6EF372FE94F82B); - context[0].h[3] = _mm256_set1_epi64x(0xA54FF53A5F1D36F1); - context[0].h[4] = _mm256_set1_epi64x(0x510E527FADE682D1); - context[0].h[5] = _mm256_set1_epi64x(0x9B05688C2B3E6C1F); - context[0].h[6] = _mm256_set1_epi64x(0x1F83D9ABFB41BD6B); - context[0].h[7] = _mm256_set1_epi64x(0x5BE0CD19137E2179); - - context[1].h[0] = _mm256_set1_epi64x(0x6A09E667F3BCC908); - context[1].h[1] = _mm256_set1_epi64x(0xBB67AE8584CAA73B); - context[1].h[2] = _mm256_set1_epi64x(0x3C6EF372FE94F82B); - context[1].h[3] = _mm256_set1_epi64x(0xA54FF53A5F1D36F1); - context[1].h[4] = _mm256_set1_epi64x(0x510E527FADE682D1); - context[1].h[5] = _mm256_set1_epi64x(0x9B05688C2B3E6C1F); - context[1].h[6] = _mm256_set1_epi64x(0x1F83D9ABFB41BD6B); - context[1].h[7] = _mm256_set1_epi64x(0x5BE0CD19137E2179); - - for(int i=0; i<4; ++i) { - context[0].w[i] = _mm256_set_epi64x ( data[3][i], data[2][i], data[1][i], data[0][i] ); - context[1].w[i] = _mm256_set_epi64x ( data[7][i], data[6][i], data[5][i], data[4][i] ); - } - for(int i=0; i<10; ++i) { - context[0].w[i+4] = _mm256_set1_epi64x( ((uint64_t*)padding)[i] ); - context[1].w[i+4] = _mm256_set1_epi64x( ((uint64_t*)padding)[i] ); - } - - //Length of the original message (before padding) - uint64_t totalSize = 32 * 8; - - //Append the length of the original message - context[0].w[14] = _mm256_set1_epi64x(0); - context[0].w[15] = _mm256_set1_epi64x(htobe64(totalSize)); - - context[1].w[14] = _mm256_set1_epi64x(0); - context[1].w[15] = _mm256_set1_epi64x(htobe64(totalSize)); - - //Calculate the message digest - sha512ProcessBlock(context); - - //Convert from host byte order to big-endian byte order - for (int i = 0; i < 8; i++) { - context[0].h[i] = mm256_htobe_epi64(context[0].h[i]); - context[1].h[i] = mm256_htobe_epi64(context[1].h[i]); - } - - //Copy the resulting digest - for(int i=0; i<8; ++i) { - digest[0][i] = _mm256_extract_epi64(context[0].h[i], 0); - digest[1][i] = _mm256_extract_epi64(context[0].h[i], 1); - digest[2][i] = _mm256_extract_epi64(context[0].h[i], 2); - digest[3][i] = _mm256_extract_epi64(context[0].h[i], 3); - - digest[4][i] = _mm256_extract_epi64(context[1].h[i], 0); - digest[5][i] = _mm256_extract_epi64(context[1].h[i], 1); - digest[6][i] = _mm256_extract_epi64(context[1].h[i], 2); - digest[7][i] = _mm256_extract_epi64(context[1].h[i], 3); - } -} - -#define blk0(n, i) (block[n][i] = mm256_betoh_epi64(block[n][i])) -#define blk(n, i) (block[n][i] = block[n][i - 16] + SIGMA3(block[n][i - 15]) + \ - SIGMA4(block[n][i - 2]) + block[n][i - 7]) - -#define ROUND512(a,b,c,d,e,f,g,h) \ - T0 += (h[0]) + SIGMA2(e[0]) + Ch((e[0]), (f[0]), (g[0])) + k[i]; \ - T1 += (h[1]) + SIGMA2(e[1]) + Ch((e[1]), (f[1]), (g[1])) + k[i]; \ - (d[0]) += T0; \ - (d[1]) += T1; \ - (h[0]) = T0 + SIGMA1(a[0]) + Maj((a[0]), (b[0]), (c[0])); \ - (h[1]) = T1 + SIGMA1(a[1]) + Maj((a[1]), (b[1]), (c[1])); \ - i++ - -#define ROUND512_0_TO_15(a,b,c,d,e,f,g,h) \ - T0 = blk0(0, i); \ - T1 = blk0(1, i); \ - ROUND512(a,b,c,d,e,f,g,h) - -#define ROUND512_16_TO_80(a,b,c,d,e,f,g,h) \ - T0 = blk(0, i); \ - T1 = blk(1, i); \ - ROUND512(a,b,c,d,e,f,g,h) - -#define R512_0 \ - ROUND512_0_TO_15(a, b, c, d, e, f, g, h); \ - ROUND512_0_TO_15(h, a, b, c, d, e, f, g); \ - ROUND512_0_TO_15(g, h, a, b, c, d, e, f); \ - ROUND512_0_TO_15(f, g, h, a, b, c, d, e); \ - ROUND512_0_TO_15(e, f, g, h, a, b, c, d); \ - ROUND512_0_TO_15(d, e, f, g, h, a, b, c); \ - ROUND512_0_TO_15(c, d, e, f, g, h, a, b); \ - ROUND512_0_TO_15(b, c, d, e, f, g, h, a) - -#define R512_16 \ - ROUND512_16_TO_80(a, b, c, d, e, f, g, h); \ - ROUND512_16_TO_80(h, a, b, c, d, e, f, g); \ - ROUND512_16_TO_80(g, h, a, b, c, d, e, f); \ - ROUND512_16_TO_80(f, g, h, a, b, c, d, e); \ - ROUND512_16_TO_80(e, f, g, h, a, b, c, d); \ - ROUND512_16_TO_80(d, e, f, g, h, a, b, c); \ - ROUND512_16_TO_80(c, d, e, f, g, h, a, b); \ - ROUND512_16_TO_80(b, c, d, e, f, g, h, a) - -#define INIT(x,n) \ - x[0] = context[0].h[n]; \ - x[1] = context[1].h[n]; \ - -void sha512ProcessBlock(Sha512Context context[2]) -{ - __m256i* block[2]; - block[0] = context[0].w; - block[1] = context[1].w; - - __m256i T0, T1; - __m256i a[2], b[2], c[2], d[2], e[2], f[2], g[2], h[2]; - INIT(a, 0) - INIT(b, 1) - INIT(c, 2) - INIT(d, 3) - INIT(e, 4) - INIT(f, 5) - INIT(g, 6) - INIT(h, 7) - - int i = 0; - R512_0; R512_0; - for(int j=0; j<8; ++j) { - R512_16; - } - - context[0].h[0] += a[0]; - context[0].h[1] += b[0]; - context[0].h[2] += c[0]; - context[0].h[3] += d[0]; - context[0].h[4] += e[0]; - context[0].h[5] += f[0]; - context[0].h[6] += g[0]; - context[0].h[7] += h[0]; - - context[1].h[0] += a[1]; - context[1].h[1] += b[1]; - context[1].h[2] += c[1]; - context[1].h[3] += d[1]; - context[1].h[4] += e[1]; - context[1].h[5] += f[1]; - context[1].h[6] += g[1]; - context[1].h[7] += h[1]; -} - -#endif // __AVX2__ diff --git a/algo/hodl/wolf-aes.h b/algo/hodl/wolf-aes.h deleted file mode 100644 index 7aa63644..00000000 --- a/algo/hodl/wolf-aes.h +++ /dev/null @@ -1,25 +0,0 @@ -#ifndef __WOLF_AES_H -#define __WOLF_AES_H - -#include -#include "simd-utils.h" - -void ExpandAESKey256(v128_t *keys, const v128_t *KeyBuf); - -#if defined(__SSE4_2__) -//#ifdef __AVX__ - -#define AES_PARALLEL_N 8 -#define BLOCK_COUNT 256 - -void AES256CBC( v128_t** data, const v128_t** next, v128_t ExpandedKey[][16], - v128_t* IV ); - -#else - -void AES256CBC( v128_t *Ciphertext, const v128_t *Plaintext, - const v128_t *ExpandedKey, v128_t IV, uint32_t BlockCount ); - -#endif - -#endif // __WOLF_AES_H diff --git a/algo/m7m/m7m.c b/algo/m7m/m7m.c index 2bf4a11f..f5660152 100644 --- a/algo/m7m/m7m.c +++ b/algo/m7m/m7m.c @@ -1,6 +1,8 @@ #include "cpuminer-config.h" #include "algo-gate-api.h" +#if !defined(__APPLE__) + #include #include #include @@ -296,8 +298,14 @@ int scanhash_m7m_hash( struct work* work, uint64_t max_nonce, return 0; } +#endif // not apple + bool register_m7m_algo( algo_gate_t *gate ) { +#if defined(__APPLE__) + applog( LOG_ERR, "M7M algo is not supported on MacOS"); + return false; +#else gate->optimizations = SHA_OPT; init_m7m_ctx(); gate->scanhash = (void*)&scanhash_m7m_hash; @@ -307,6 +315,6 @@ bool register_m7m_algo( algo_gate_t *gate ) gate->set_work_data_endian = (void*)&set_work_data_big_endian; opt_target_factor = 65536.0; return true; +#endif } - diff --git a/algo/sha/sha1-hash.c b/algo/sha/sha1-hash.c new file mode 100644 index 00000000..6f1928df --- /dev/null +++ b/algo/sha/sha1-hash.c @@ -0,0 +1,390 @@ +#include "simd-utils.h" +#include +#include "sha1-hash.h" + +#if defined(__x86_64__) && defined(__SHA__) + +#define sha1_opt_rounds( state_out, data, state_in ) \ +{ \ + __m128i ABCD, ABCD_SAVE, E0, E0_SAVE, E1; \ + __m128i MSG0, MSG1, MSG2, MSG3; \ +\ + ABCD = _mm_load_si128( (const __m128i*) state_in ); \ + E0 = _mm_set_epi32( state_in[4], 0, 0, 0 ); \ + ABCD = _mm_shuffle_epi32( ABCD, 0x1B ); \ +\ + ABCD_SAVE = ABCD; \ + E0_SAVE = E0; \ +\ + /* Rounds 0-3 */ \ + MSG0 = load_msg( data, 0 ); \ + E0 = _mm_add_epi32( E0, MSG0 ); \ + E1 = ABCD; \ + ABCD = _mm_sha1rnds4_epu32( ABCD, E0, 0 ); \ +\ + /* Rounds 4-7 */ \ + MSG1 = load_msg( data, 1 ); \ + E1 = _mm_sha1nexte_epu32( E1, MSG1 ); \ + E0 = ABCD; \ + ABCD = _mm_sha1rnds4_epu32( ABCD, E1, 0 ); \ + MSG0 = _mm_sha1msg1_epu32( MSG0, MSG1 ); \ +\ + /* Rounds 8-11 */ \ + MSG2 = load_msg( data, 2 ); \ + E0 = _mm_sha1nexte_epu32( E0, MSG2 ); \ + E1 = ABCD; \ + ABCD = _mm_sha1rnds4_epu32( ABCD, E0, 0 ); \ + MSG1 = _mm_sha1msg1_epu32( MSG1, MSG2 ); \ + MSG0 = _mm_xor_si128( MSG0, MSG2 ); \ +\ + /* Rounds 12-15 */ \ + MSG3 = load_msg( data, 3 ); \ + E1 = _mm_sha1nexte_epu32( E1, MSG3 ); \ + E0 = ABCD; \ + MSG0 = _mm_sha1msg2_epu32( MSG0, MSG3 ); \ + ABCD = _mm_sha1rnds4_epu32( ABCD, E1, 0 ); \ + MSG2 = _mm_sha1msg1_epu32( MSG2, MSG3 ); \ + MSG1 = _mm_xor_si128( MSG1, MSG3 ); \ +\ + /* Rounds 16-19 */ \ + E0 = _mm_sha1nexte_epu32( E0, MSG0 ); \ + E1 = ABCD; \ + MSG1 = _mm_sha1msg2_epu32( MSG1, MSG0 ); \ + ABCD = _mm_sha1rnds4_epu32( ABCD, E0, 0 ); \ + MSG3 = _mm_sha1msg1_epu32( MSG3, MSG0 ); \ + MSG2 = _mm_xor_si128( MSG2, MSG0 ); \ +\ + /* Rounds 20-23 */ \ + E1 = _mm_sha1nexte_epu32( E1, MSG1 ); \ + E0 = ABCD; \ + MSG2 = _mm_sha1msg2_epu32( MSG2, MSG1 ); \ + ABCD = _mm_sha1rnds4_epu32( ABCD, E1, 1 ); \ + MSG0 = _mm_sha1msg1_epu32( MSG0, MSG1 ); \ + MSG3 = _mm_xor_si128( MSG3, MSG1 ); \ +\ + /* Rounds 24-27 */ \ + E0 = _mm_sha1nexte_epu32( E0, MSG2 ); \ + E1 = ABCD; \ + MSG3 = _mm_sha1msg2_epu32( MSG3, MSG2 ); \ + ABCD = _mm_sha1rnds4_epu32( ABCD, E0, 1 ); \ + MSG1 = _mm_sha1msg1_epu32( MSG1, MSG2 ); \ + MSG0 = _mm_xor_si128( MSG0, MSG2 ); \ +\ + /* Rounds 28-31 */ \ + E1 = _mm_sha1nexte_epu32( E1, MSG3 ); \ + E0 = ABCD; \ + MSG0 = _mm_sha1msg2_epu32( MSG0, MSG3 ); \ + ABCD = _mm_sha1rnds4_epu32( ABCD, E1, 1 ); \ + MSG2 = _mm_sha1msg1_epu32( MSG2, MSG3 ); \ + MSG1 = _mm_xor_si128( MSG1, MSG3 ); \ +\ + /* Rounds 32-35 */ \ + E0 = _mm_sha1nexte_epu32( E0, MSG0 ); \ + E1 = ABCD; \ + MSG1 = _mm_sha1msg2_epu32( MSG1, MSG0 ); \ + ABCD = _mm_sha1rnds4_epu32( ABCD, E0, 1 ); \ + MSG3 = _mm_sha1msg1_epu32( MSG3, MSG0 ); \ + MSG2 = _mm_xor_si128( MSG2, MSG0 ); \ +\ + /* Rounds 36-39 */ \ + E1 = _mm_sha1nexte_epu32( E1, MSG1 ); \ + E0 = ABCD; \ + MSG2 = _mm_sha1msg2_epu32( MSG2, MSG1 ); \ + ABCD = _mm_sha1rnds4_epu32( ABCD, E1, 1 ); \ + MSG0 = _mm_sha1msg1_epu32( MSG0, MSG1 ); \ + MSG3 = _mm_xor_si128( MSG3, MSG1 ); \ +\ + /* Rounds 40-43 */ \ + E0 = _mm_sha1nexte_epu32( E0, MSG2 ); \ + E1 = ABCD; \ + MSG3 = _mm_sha1msg2_epu32( MSG3, MSG2 ); \ + ABCD = _mm_sha1rnds4_epu32( ABCD, E0, 2 ); \ + MSG1 = _mm_sha1msg1_epu32( MSG1, MSG2 ); \ + MSG0 = _mm_xor_si128( MSG0, MSG2 ); \ +\ + /* Rounds 44-47 */ \ + E1 = _mm_sha1nexte_epu32( E1, MSG3 ); \ + E0 = ABCD; \ + MSG0 = _mm_sha1msg2_epu32( MSG0, MSG3 ); \ + ABCD = _mm_sha1rnds4_epu32( ABCD, E1, 2 ); \ + MSG2 = _mm_sha1msg1_epu32( MSG2, MSG3 ); \ + MSG1 = _mm_xor_si128( MSG1, MSG3 ); \ +\ + /* Rounds 48-51 */ \ + E0 = _mm_sha1nexte_epu32( E0, MSG0 ); \ + E1 = ABCD; \ + MSG1 = _mm_sha1msg2_epu32( MSG1, MSG0 ); \ + ABCD = _mm_sha1rnds4_epu32( ABCD, E0, 2 ); \ + MSG3 = _mm_sha1msg1_epu32( MSG3, MSG0 ); \ + MSG2 = _mm_xor_si128( MSG2, MSG0 ); \ + E0 = _mm_sha1nexte_epu32( E0, MSG0 ); \ + E1 = ABCD; \ + MSG1 = _mm_sha1msg2_epu32( MSG1, MSG0 ); \ + ABCD = _mm_sha1rnds4_epu32( ABCD, E0, 2 ); \ + MSG3 = _mm_sha1msg1_epu32( MSG3, MSG0 ); \ + MSG2 = _mm_xor_si128( MSG2, MSG0 ); \ +\ + /* Rounds 52-55 */ \ + E1 = _mm_sha1nexte_epu32( E1, MSG1 ); \ + E0 = ABCD; \ + MSG2 = _mm_sha1msg2_epu32( MSG2, MSG1 ); \ + ABCD = _mm_sha1rnds4_epu32( ABCD, E1, 2 ); \ + MSG0 = _mm_sha1msg1_epu32( MSG0, MSG1 ); \ + MSG3 = _mm_xor_si128( MSG3, MSG1 ); \ +\ + /* Rounds 56-59 */ \ + E0 = _mm_sha1nexte_epu32( E0, MSG2 ); \ + E1 = ABCD; \ + MSG3 = _mm_sha1msg2_epu32( MSG3, MSG2 ); \ + ABCD = _mm_sha1rnds4_epu32( ABCD, E0, 2 ); \ + MSG1 = _mm_sha1msg1_epu32( MSG1, MSG2 ); \ + MSG0 = _mm_xor_si128( MSG0, MSG2 ); \ +\ + /* Rounds 60-63 */ \ + E1 = _mm_sha1nexte_epu32( E1, MSG3 ); \ + E0 = ABCD; \ + MSG0 = _mm_sha1msg2_epu32( MSG0, MSG3 ); \ + ABCD = _mm_sha1rnds4_epu32( ABCD, E1, 3 ); \ + MSG2 = _mm_sha1msg1_epu32( MSG2, MSG3 ); \ + MSG1 = _mm_xor_si128( MSG1, MSG3 ); \ +\ + /* Rounds 64-67 */ \ + E0 = _mm_sha1nexte_epu32( E0, MSG0 ); \ + E1 = ABCD; \ + MSG1 = _mm_sha1msg2_epu32( MSG1, MSG0 ); \ + ABCD = _mm_sha1rnds4_epu32( ABCD, E0, 3 ); \ + MSG3 = _mm_sha1msg1_epu32( MSG3, MSG0 ); \ + MSG2 = _mm_xor_si128( MSG2, MSG0 ); \ +\ + /* Rounds 68-71 */ \ + E1 = _mm_sha1nexte_epu32( E1, MSG1 ); \ + E0 = ABCD; \ + MSG2 = _mm_sha1msg2_epu32( MSG2, MSG1 ); \ + ABCD = _mm_sha1rnds4_epu32( ABCD, E1, 3 ); \ + MSG3 = _mm_xor_si128( MSG3, MSG1 ); \ +\ + /* Rounds 72-75 */ \ + E0 = _mm_sha1nexte_epu32( E0, MSG2 ); \ + E1 = ABCD; \ + MSG3 = _mm_sha1msg2_epu32( MSG3, MSG2 ); \ + ABCD = _mm_sha1rnds4_epu32( ABCD, E0, 3 ); \ +\ + /* Rounds 76-79 */ \ + E1 = _mm_sha1nexte_epu32( E1, MSG3 ); \ + E0 = ABCD; \ + ABCD = _mm_sha1rnds4_epu32( ABCD, E1, 3 ); \ +\ + /* Combine state */ \ + E0 = _mm_sha1nexte_epu32( E0, E0_SAVE ); \ + ABCD = _mm_add_epi32( ABCD, ABCD_SAVE ); \ +\ + /* Save state */ \ + ABCD = _mm_shuffle_epi32( ABCD, 0x1B ); \ + _mm_store_si128( (__m128i*) state_out, ABCD ); \ + state_out[4] = _mm_extract_epi32( E0, 3 ); \ +} + + +void sha1_x86_sha_transform_le( uint32_t *state_out, const void *input, + const uint32_t *state_in ) +{ +#define load_msg( m, i ) casti_v128( m, i ) + sha1_opt_rounds( state_out, input, state_in ); +#undef load_msg +} + +void sha1_x86_sha_transform_be( uint32_t *state_out, const void *input, + const uint32_t *state_in ) +{ + const __m128i MASK = _mm_set_epi64x( 0x0001020304050607ULL, + 0x08090a0b0c0d0e0fULL ); +#define load_msg( m, i ) _mm_shuffle_epi8( casti_v128( m, i ), MASK ) + sha1_opt_rounds( state_out, input, state_in ); +#undef load_msg +} + +#endif + +#if defined(__aarch64__) && defined(__ARM_FEATURE_SHA2) + +#define sha1_neon_rounds( state_out, data, state_in ) \ +{ \ + uint32x4_t ABCD, ABCD_SAVED; \ + uint32x4_t TMP0, TMP1; \ + uint32x4_t MSG0, MSG1, MSG2, MSG3; \ + uint32_t E0, E0_SAVED, E1; \ +\ + /* Load state */ \ + ABCD = vld1q_u32( &state_in[0] ); \ + E0 = state_in[4]; \ +\ + /* Save state */ \ + ABCD_SAVED = ABCD; \ + E0_SAVED = E0; \ +\ + MSG0 = load_msg( data, 0 ); \ + MSG1 = load_msg( data, 1 ); \ + MSG2 = load_msg( data, 2 ); \ + MSG3 = load_msg( data, 3 ); \ +\ + TMP0 = vaddq_u32( MSG0, vdupq_n_u32( 0x5A827999 ) ); \ + TMP1 = vaddq_u32( MSG1, vdupq_n_u32( 0x5A827999 ) ); \ +\ + /* Rounds 0-3 */ \ + E1 = vsha1h_u32( vgetq_lane_u32( ABCD, 0 ) ); \ + ABCD = vsha1cq_u32( ABCD, E0, TMP0 ); \ + TMP0 = vaddq_u32( MSG2, vdupq_n_u32( 0x5A827999 ) ); \ + MSG0 = vsha1su0q_u32( MSG0, MSG1, MSG2 ); \ +\ + /* Rounds 4-7 */ \ + E0 = vsha1h_u32( vgetq_lane_u32( ABCD, 0 ) ); \ + ABCD = vsha1cq_u32(ABCD, E1, TMP1); \ + TMP1 = vaddq_u32( MSG3, vdupq_n_u32( 0x5A827999 ) ); \ + MSG0 = vsha1su1q_u32( MSG0, MSG3 ); \ + MSG1 = vsha1su0q_u32( MSG1, MSG2, MSG3 ); \ +\ + /* Rounds 8-11 */ \ + E1 = vsha1h_u32( vgetq_lane_u32( ABCD, 0 ) ); \ + ABCD = vsha1cq_u32( ABCD, E0, TMP0 ); \ + TMP0 = vaddq_u32( MSG0, vdupq_n_u32( 0x5A827999 ) ); \ + MSG1 = vsha1su1q_u32( MSG1, MSG0 ); \ + MSG2 = vsha1su0q_u32( MSG2, MSG3, MSG0 ); \ +\ + /* Rounds 12-15 */ \ + E0 = vsha1h_u32( vgetq_lane_u32( ABCD, 0 ) ); \ + ABCD = vsha1cq_u32( ABCD, E1, TMP1 ); \ + TMP1 = vaddq_u32( MSG1, vdupq_n_u32( 0x6ED9EBA1 ) ); \ + MSG2 = vsha1su1q_u32( MSG2, MSG1 ); \ + MSG3 = vsha1su0q_u32( MSG3, MSG0, MSG1 ); \ +\ + /* Rounds 16-19 */\ + E1 = vsha1h_u32( vgetq_lane_u32( ABCD, 0 ) ); \ + ABCD = vsha1cq_u32( ABCD, E0, TMP0 ); \ + TMP0 = vaddq_u32( MSG2, vdupq_n_u32( 0x6ED9EBA1 ) ); \ + MSG3 = vsha1su1q_u32( MSG3, MSG2 ); \ + MSG0 = vsha1su0q_u32( MSG0, MSG1, MSG2 ); \ +\ + /* Rounds 20-23 */ \ + E0 = vsha1h_u32( vgetq_lane_u32( ABCD, 0 ) ); \ + ABCD = vsha1pq_u32( ABCD, E1, TMP1 ); \ + TMP1 = vaddq_u32( MSG3, vdupq_n_u32( 0x6ED9EBA1 ) ); \ + MSG0 = vsha1su1q_u32( MSG0, MSG3 ); \ + MSG1 = vsha1su0q_u32( MSG1, MSG2, MSG3 ); \ +\ + /* Rounds 24-27 */ \ + E1 = vsha1h_u32( vgetq_lane_u32( ABCD, 0 ) ); \ + ABCD = vsha1pq_u32( ABCD, E0, TMP0 ); \ + TMP0 = vaddq_u32( MSG0, vdupq_n_u32( 0x6ED9EBA1 ) ); \ + MSG1 = vsha1su1q_u32( MSG1, MSG0 ); \ + MSG2 = vsha1su0q_u32( MSG2, MSG3, MSG0 ); \ +\ + /* Rounds 28-31 */ \ + E0 = vsha1h_u32( vgetq_lane_u32( ABCD, 0 ) ); \ + ABCD = vsha1pq_u32( ABCD, E1, TMP1 ); \ + TMP1 = vaddq_u32( MSG1, vdupq_n_u32( 0x6ED9EBA1 ) ); \ + MSG2 = vsha1su1q_u32( MSG2, MSG1 ); \ + MSG3 = vsha1su0q_u32( MSG3, MSG0, MSG1 ); \ +\ + /* Rounds 32-35 */ \ + E1 = vsha1h_u32( vgetq_lane_u32( ABCD, 0 ) ); \ + ABCD = vsha1pq_u32( ABCD, E0, TMP0 ); \ + TMP0 = vaddq_u32( MSG2, vdupq_n_u32( 0x8F1BBCDC ) ); \ + MSG3 = vsha1su1q_u32( MSG3, MSG2 ); \ + MSG0 = vsha1su0q_u32( MSG0, MSG1, MSG2 ); \ +\ + /* Rounds 36-39 */ \ + E0 = vsha1h_u32( vgetq_lane_u32( ABCD, 0 ) ); \ + ABCD = vsha1pq_u32( ABCD, E1, TMP1 ); \ + TMP1 = vaddq_u32( MSG3, vdupq_n_u32( 0x8F1BBCDC ) ); \ + MSG0 = vsha1su1q_u32( MSG0, MSG3 ); \ + MSG1 = vsha1su0q_u32( MSG1, MSG2, MSG3 ); \ +\ + /* Rounds 40-43 */ \ + E1 = vsha1h_u32( vgetq_lane_u32( ABCD, 0 ) ); \ + ABCD = vsha1mq_u32( ABCD, E0, TMP0 ); \ + TMP0 = vaddq_u32( MSG0, vdupq_n_u32( 0x8F1BBCDC ) ); \ + MSG1 = vsha1su1q_u32( MSG1, MSG0 ); \ + MSG2 = vsha1su0q_u32( MSG2, MSG3, MSG0 ); \ +\ + /* Rounds 44-47 */ \ + E0 = vsha1h_u32( vgetq_lane_u32( ABCD, 0 ) ); \ + ABCD = vsha1mq_u32( ABCD, E1, TMP1 ); \ + TMP1 = vaddq_u32( MSG1, vdupq_n_u32( 0x8F1BBCDC ) ); \ + MSG2 = vsha1su1q_u32( MSG2, MSG1 ); \ + MSG3 = vsha1su0q_u32( MSG3, MSG0, MSG1 ); \ +\ + /* Rounds 48-51 */ \ + E1 = vsha1h_u32( vgetq_lane_u32( ABCD, 0 ) ); \ + ABCD = vsha1mq_u32( ABCD, E0, TMP0 ); \ + TMP0 = vaddq_u32( MSG2, vdupq_n_u32( 0x8F1BBCDC ) ); \ + MSG3 = vsha1su1q_u32( MSG3, MSG2 ); \ + MSG0 = vsha1su0q_u32( MSG0, MSG1, MSG2 ); \ +\ + /* Rounds 52-55 */ \ + E0 = vsha1h_u32( vgetq_lane_u32( ABCD, 0 ) ); \ + ABCD = vsha1mq_u32( ABCD, E1, TMP1 ); \ + TMP1 = vaddq_u32( MSG3, vdupq_n_u32( 0xCA62C1D6 ) ); \ + MSG0 = vsha1su1q_u32( MSG0, MSG3 ); \ + MSG1 = vsha1su0q_u32( MSG1, MSG2, MSG3 ); \ +\ + /* Rounds 56-59 */ \ + E1 = vsha1h_u32( vgetq_lane_u32( ABCD, 0 ) ); \ + ABCD = vsha1mq_u32( ABCD, E0, TMP0 ); \ + TMP0 = vaddq_u32( MSG0, vdupq_n_u32( 0xCA62C1D6 ) ); \ + MSG1 = vsha1su1q_u32( MSG1, MSG0 ); \ + MSG2 = vsha1su0q_u32( MSG2, MSG3, MSG0 ); \ +\ + /* Rounds 60-63 */ \ + E0 = vsha1h_u32( vgetq_lane_u32( ABCD, 0 ) ); \ + ABCD = vsha1pq_u32( ABCD, E1, TMP1 ); \ + TMP1 = vaddq_u32( MSG1, vdupq_n_u32( 0xCA62C1D6 ) ); \ + MSG2 = vsha1su1q_u32( MSG2, MSG1 ); \ + MSG3 = vsha1su0q_u32( MSG3, MSG0, MSG1 ); \ +\ + /* Rounds 64-67 */ \ + E1 = vsha1h_u32( vgetq_lane_u32( ABCD, 0 ) ); \ + ABCD = vsha1pq_u32( ABCD, E0, TMP0 ); \ + TMP0 = vaddq_u32(MSG2, vdupq_n_u32( 0xCA62C1D6 ) ); \ + MSG3 = vsha1su1q_u32( MSG3, MSG2 ); \ + MSG0 = vsha1su0q_u32( MSG0, MSG1, MSG2 ); \ +\ + /* Rounds 68-71 */ \ + E0 = vsha1h_u32( vgetq_lane_u32( ABCD, 0) ); \ + ABCD = vsha1pq_u32( ABCD, E1, TMP1 ); \ + TMP1 = vaddq_u32( MSG3, vdupq_n_u32( 0xCA62C1D6 ) ); \ + MSG0 = vsha1su1q_u32( MSG0, MSG3 ); \ +\ + /* Rounds 72-75 */ \ + E1 = vsha1h_u32( vgetq_lane_u32( ABCD, 0 ) ); \ + ABCD = vsha1pq_u32( ABCD, E0, TMP0 ); \ +\ + /* Rounds 76-79 */ \ + E0 = vsha1h_u32( vgetq_lane_u32( ABCD, 0 ) ); \ + ABCD = vsha1pq_u32( ABCD, E1, TMP1 ); \ +\ + /* Combine state */ \ + E0 += E0_SAVED; \ + ABCD = vaddq_u32( ABCD_SAVED, ABCD ); \ +\ + /* Save state */ \ + vst1q_u32( &state_out[0], ABCD ); \ + state_out[4] = E0; \ +} + +void sha1_neon_sha_transform_be( uint32_t *state_out, const void *input, + const uint32_t *state_in ) +{ +#define load_msg( m, i ) v128_bswap32( casti_v128( m, i ) ); + sha1_neon_rounds( state_out, input, state_in ); +#undef load_msg +} + +void sha1_neon_sha_transform_le( uint32_t *state_out, const void *input, + const uint32_t *state_in ) +{ +#define load_msg( m, i ) casti_v128( m, i ); + sha1_neon_rounds( state_out, input, state_in ); +#undef load_msg +} + +#endif diff --git a/algo/sha/sha1-hash.h b/algo/sha/sha1-hash.h new file mode 100644 index 00000000..b46da3cb --- /dev/null +++ b/algo/sha/sha1-hash.h @@ -0,0 +1,40 @@ +#ifndef SHA1_HASH_H__ +#define SHA1_HASH_H__ 1 + +#include +#include "simd-utils.h" +#include "cpuminer-config.h" +#include "sph_sha1.h" + +// SHA hooks for sha1, automaticaaly substituded in SPH +#if defined(__x86_64__) && defined(__SHA__) + +void sha1_x86_sha_transform_le( uint32_t *state_out, const void *input, + const uint32_t *state_in ); + +void sha1_x86_sha_transform_be( uint32_t *state_out, const void *input, + const uint32_t *state_in ); + +#define sha1_transform_le sha1_x86_sha_transform_le +#define sha1_transform_be sha1_x86_sha_transform_be + +#elif defined(__ARM_NEON) && defined(__ARM_FEATURE_SHA2) + +void sha1_neon_sha_transform_be( uint32_t *state_out, const void *input, + const uint32_t *state_in ); +void sha1_neon_sha_transform_le( uint32_t *state_out, const void *input, + const uint32_t *state_in ); + +#define sha1_transform_le sha1_neon_sha_transform_le +#define sha1_transform_be sha1_neon_sha_transform_be + +#else + +#define sha1_transform_le sph_sha1_transform_le +#define sha1_transform_be sph_sha1_transform_be + +#endif + +#define sha1_full sph_sha1_full + +#endif diff --git a/algo/sha/sha1.c b/algo/sha/sha1.c new file mode 100644 index 00000000..453592f4 --- /dev/null +++ b/algo/sha/sha1.c @@ -0,0 +1,400 @@ +/* $Id: sha1.c 216 2010-06-08 09:46:57Z tp $ */ +/* + * SHA-1 implementation. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2007-2010 Projet RNRT SAPHIR + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @author Thomas Pornin + */ + +#include +#include +#include "simd-utils.h" +#include "sha1-hash.h" + +#define F(B, C, D) ((((C) ^ (D)) & (B)) ^ (D)) +#define G(B, C, D) ((B) ^ (C) ^ (D)) +#define H(B, C, D) (((D) & (C)) | (((D) | (C)) & (B))) +#define I(B, C, D) G(B, C, D) + +#define ROTL rol32 +//#define ROTL SPH_ROTL32 + +#define K1 SPH_C32(0x5A827999) +#define K2 SPH_C32(0x6ED9EBA1) +#define K3 SPH_C32(0x8F1BBCDC) +#define K4 SPH_C32(0xCA62C1D6) + +static const sph_u32 IV[5] = { + SPH_C32(0x67452301), SPH_C32(0xEFCDAB89), + SPH_C32(0x98BADCFE), SPH_C32(0x10325476), + SPH_C32(0xC3D2E1F0) +}; + +/* + * This macro defines the body for a SHA-1 compression function + * implementation. The "in" parameter should evaluate, when applied to a + * numerical input parameter from 0 to 15, to an expression which yields + * the corresponding input block. The "r" parameter should evaluate to + * an array or pointer expression designating the array of 5 words which + * contains the input and output of the compression function. + */ + +#define SHA1_ROUND_BODY(in, r) do { \ + sph_u32 A, B, C, D, E; \ + sph_u32 W00, W01, W02, W03, W04, W05, W06, W07; \ + sph_u32 W08, W09, W10, W11, W12, W13, W14, W15; \ + \ + A = (r)[0]; \ + B = (r)[1]; \ + C = (r)[2]; \ + D = (r)[3]; \ + E = (r)[4]; \ + \ + W00 = in(0); \ + E = SPH_T32(ROTL(A, 5) + F(B, C, D) + E + W00 + K1); \ + B = ROTL(B, 30); \ + W01 = in(1); \ + D = SPH_T32(ROTL(E, 5) + F(A, B, C) + D + W01 + K1); \ + A = ROTL(A, 30); \ + W02 = in(2); \ + C = SPH_T32(ROTL(D, 5) + F(E, A, B) + C + W02 + K1); \ + E = ROTL(E, 30); \ + W03 = in(3); \ + B = SPH_T32(ROTL(C, 5) + F(D, E, A) + B + W03 + K1); \ + D = ROTL(D, 30); \ + W04 = in(4); \ + A = SPH_T32(ROTL(B, 5) + F(C, D, E) + A + W04 + K1); \ + C = ROTL(C, 30); \ + W05 = in(5); \ + E = SPH_T32(ROTL(A, 5) + F(B, C, D) + E + W05 + K1); \ + B = ROTL(B, 30); \ + W06 = in(6); \ + D = SPH_T32(ROTL(E, 5) + F(A, B, C) + D + W06 + K1); \ + A = ROTL(A, 30); \ + W07 = in(7); \ + C = SPH_T32(ROTL(D, 5) + F(E, A, B) + C + W07 + K1); \ + E = ROTL(E, 30); \ + W08 = in(8); \ + B = SPH_T32(ROTL(C, 5) + F(D, E, A) + B + W08 + K1); \ + D = ROTL(D, 30); \ + W09 = in(9); \ + A = SPH_T32(ROTL(B, 5) + F(C, D, E) + A + W09 + K1); \ + C = ROTL(C, 30); \ + W10 = in(10); \ + E = SPH_T32(ROTL(A, 5) + F(B, C, D) + E + W10 + K1); \ + B = ROTL(B, 30); \ + W11 = in(11); \ + D = SPH_T32(ROTL(E, 5) + F(A, B, C) + D + W11 + K1); \ + A = ROTL(A, 30); \ + W12 = in(12); \ + C = SPH_T32(ROTL(D, 5) + F(E, A, B) + C + W12 + K1); \ + E = ROTL(E, 30); \ + W13 = in(13); \ + B = SPH_T32(ROTL(C, 5) + F(D, E, A) + B + W13 + K1); \ + D = ROTL(D, 30); \ + W14 = in(14); \ + A = SPH_T32(ROTL(B, 5) + F(C, D, E) + A + W14 + K1); \ + C = ROTL(C, 30); \ + W15 = in(15); \ + E = SPH_T32(ROTL(A, 5) + F(B, C, D) + E + W15 + K1); \ + B = ROTL(B, 30); \ + W00 = ROTL(W13 ^ W08 ^ W02 ^ W00, 1); \ + D = SPH_T32(ROTL(E, 5) + F(A, B, C) + D + W00 + K1); \ + A = ROTL(A, 30); \ + W01 = ROTL(W14 ^ W09 ^ W03 ^ W01, 1); \ + C = SPH_T32(ROTL(D, 5) + F(E, A, B) + C + W01 + K1); \ + E = ROTL(E, 30); \ + W02 = ROTL(W15 ^ W10 ^ W04 ^ W02, 1); \ + B = SPH_T32(ROTL(C, 5) + F(D, E, A) + B + W02 + K1); \ + D = ROTL(D, 30); \ + W03 = ROTL(W00 ^ W11 ^ W05 ^ W03, 1); \ + A = SPH_T32(ROTL(B, 5) + F(C, D, E) + A + W03 + K1); \ + C = ROTL(C, 30); \ + W04 = ROTL(W01 ^ W12 ^ W06 ^ W04, 1); \ + E = SPH_T32(ROTL(A, 5) + G(B, C, D) + E + W04 + K2); \ + B = ROTL(B, 30); \ + W05 = ROTL(W02 ^ W13 ^ W07 ^ W05, 1); \ + D = SPH_T32(ROTL(E, 5) + G(A, B, C) + D + W05 + K2); \ + A = ROTL(A, 30); \ + W06 = ROTL(W03 ^ W14 ^ W08 ^ W06, 1); \ + C = SPH_T32(ROTL(D, 5) + G(E, A, B) + C + W06 + K2); \ + E = ROTL(E, 30); \ + W07 = ROTL(W04 ^ W15 ^ W09 ^ W07, 1); \ + B = SPH_T32(ROTL(C, 5) + G(D, E, A) + B + W07 + K2); \ + D = ROTL(D, 30); \ + W08 = ROTL(W05 ^ W00 ^ W10 ^ W08, 1); \ + A = SPH_T32(ROTL(B, 5) + G(C, D, E) + A + W08 + K2); \ + C = ROTL(C, 30); \ + W09 = ROTL(W06 ^ W01 ^ W11 ^ W09, 1); \ + E = SPH_T32(ROTL(A, 5) + G(B, C, D) + E + W09 + K2); \ + B = ROTL(B, 30); \ + W10 = ROTL(W07 ^ W02 ^ W12 ^ W10, 1); \ + D = SPH_T32(ROTL(E, 5) + G(A, B, C) + D + W10 + K2); \ + A = ROTL(A, 30); \ + W11 = ROTL(W08 ^ W03 ^ W13 ^ W11, 1); \ + C = SPH_T32(ROTL(D, 5) + G(E, A, B) + C + W11 + K2); \ + E = ROTL(E, 30); \ + W12 = ROTL(W09 ^ W04 ^ W14 ^ W12, 1); \ + B = SPH_T32(ROTL(C, 5) + G(D, E, A) + B + W12 + K2); \ + D = ROTL(D, 30); \ + W13 = ROTL(W10 ^ W05 ^ W15 ^ W13, 1); \ + A = SPH_T32(ROTL(B, 5) + G(C, D, E) + A + W13 + K2); \ + C = ROTL(C, 30); \ + W14 = ROTL(W11 ^ W06 ^ W00 ^ W14, 1); \ + E = SPH_T32(ROTL(A, 5) + G(B, C, D) + E + W14 + K2); \ + B = ROTL(B, 30); \ + W15 = ROTL(W12 ^ W07 ^ W01 ^ W15, 1); \ + D = SPH_T32(ROTL(E, 5) + G(A, B, C) + D + W15 + K2); \ + A = ROTL(A, 30); \ + W00 = ROTL(W13 ^ W08 ^ W02 ^ W00, 1); \ + C = SPH_T32(ROTL(D, 5) + G(E, A, B) + C + W00 + K2); \ + E = ROTL(E, 30); \ + W01 = ROTL(W14 ^ W09 ^ W03 ^ W01, 1); \ + B = SPH_T32(ROTL(C, 5) + G(D, E, A) + B + W01 + K2); \ + D = ROTL(D, 30); \ + W02 = ROTL(W15 ^ W10 ^ W04 ^ W02, 1); \ + A = SPH_T32(ROTL(B, 5) + G(C, D, E) + A + W02 + K2); \ + C = ROTL(C, 30); \ + W03 = ROTL(W00 ^ W11 ^ W05 ^ W03, 1); \ + E = SPH_T32(ROTL(A, 5) + G(B, C, D) + E + W03 + K2); \ + B = ROTL(B, 30); \ + W04 = ROTL(W01 ^ W12 ^ W06 ^ W04, 1); \ + D = SPH_T32(ROTL(E, 5) + G(A, B, C) + D + W04 + K2); \ + A = ROTL(A, 30); \ + W05 = ROTL(W02 ^ W13 ^ W07 ^ W05, 1); \ + C = SPH_T32(ROTL(D, 5) + G(E, A, B) + C + W05 + K2); \ + E = ROTL(E, 30); \ + W06 = ROTL(W03 ^ W14 ^ W08 ^ W06, 1); \ + B = SPH_T32(ROTL(C, 5) + G(D, E, A) + B + W06 + K2); \ + D = ROTL(D, 30); \ + W07 = ROTL(W04 ^ W15 ^ W09 ^ W07, 1); \ + A = SPH_T32(ROTL(B, 5) + G(C, D, E) + A + W07 + K2); \ + C = ROTL(C, 30); \ + W08 = ROTL(W05 ^ W00 ^ W10 ^ W08, 1); \ + E = SPH_T32(ROTL(A, 5) + H(B, C, D) + E + W08 + K3); \ + B = ROTL(B, 30); \ + W09 = ROTL(W06 ^ W01 ^ W11 ^ W09, 1); \ + D = SPH_T32(ROTL(E, 5) + H(A, B, C) + D + W09 + K3); \ + A = ROTL(A, 30); \ + W10 = ROTL(W07 ^ W02 ^ W12 ^ W10, 1); \ + C = SPH_T32(ROTL(D, 5) + H(E, A, B) + C + W10 + K3); \ + E = ROTL(E, 30); \ + W11 = ROTL(W08 ^ W03 ^ W13 ^ W11, 1); \ + B = SPH_T32(ROTL(C, 5) + H(D, E, A) + B + W11 + K3); \ + D = ROTL(D, 30); \ + W12 = ROTL(W09 ^ W04 ^ W14 ^ W12, 1); \ + A = SPH_T32(ROTL(B, 5) + H(C, D, E) + A + W12 + K3); \ + C = ROTL(C, 30); \ + W13 = ROTL(W10 ^ W05 ^ W15 ^ W13, 1); \ + E = SPH_T32(ROTL(A, 5) + H(B, C, D) + E + W13 + K3); \ + B = ROTL(B, 30); \ + W14 = ROTL(W11 ^ W06 ^ W00 ^ W14, 1); \ + D = SPH_T32(ROTL(E, 5) + H(A, B, C) + D + W14 + K3); \ + A = ROTL(A, 30); \ + W15 = ROTL(W12 ^ W07 ^ W01 ^ W15, 1); \ + C = SPH_T32(ROTL(D, 5) + H(E, A, B) + C + W15 + K3); \ + E = ROTL(E, 30); \ + W00 = ROTL(W13 ^ W08 ^ W02 ^ W00, 1); \ + B = SPH_T32(ROTL(C, 5) + H(D, E, A) + B + W00 + K3); \ + D = ROTL(D, 30); \ + W01 = ROTL(W14 ^ W09 ^ W03 ^ W01, 1); \ + A = SPH_T32(ROTL(B, 5) + H(C, D, E) + A + W01 + K3); \ + C = ROTL(C, 30); \ + W02 = ROTL(W15 ^ W10 ^ W04 ^ W02, 1); \ + E = SPH_T32(ROTL(A, 5) + H(B, C, D) + E + W02 + K3); \ + B = ROTL(B, 30); \ + W03 = ROTL(W00 ^ W11 ^ W05 ^ W03, 1); \ + D = SPH_T32(ROTL(E, 5) + H(A, B, C) + D + W03 + K3); \ + A = ROTL(A, 30); \ + W04 = ROTL(W01 ^ W12 ^ W06 ^ W04, 1); \ + C = SPH_T32(ROTL(D, 5) + H(E, A, B) + C + W04 + K3); \ + E = ROTL(E, 30); \ + W05 = ROTL(W02 ^ W13 ^ W07 ^ W05, 1); \ + B = SPH_T32(ROTL(C, 5) + H(D, E, A) + B + W05 + K3); \ + D = ROTL(D, 30); \ + W06 = ROTL(W03 ^ W14 ^ W08 ^ W06, 1); \ + A = SPH_T32(ROTL(B, 5) + H(C, D, E) + A + W06 + K3); \ + C = ROTL(C, 30); \ + W07 = ROTL(W04 ^ W15 ^ W09 ^ W07, 1); \ + E = SPH_T32(ROTL(A, 5) + H(B, C, D) + E + W07 + K3); \ + B = ROTL(B, 30); \ + W08 = ROTL(W05 ^ W00 ^ W10 ^ W08, 1); \ + D = SPH_T32(ROTL(E, 5) + H(A, B, C) + D + W08 + K3); \ + A = ROTL(A, 30); \ + W09 = ROTL(W06 ^ W01 ^ W11 ^ W09, 1); \ + C = SPH_T32(ROTL(D, 5) + H(E, A, B) + C + W09 + K3); \ + E = ROTL(E, 30); \ + W10 = ROTL(W07 ^ W02 ^ W12 ^ W10, 1); \ + B = SPH_T32(ROTL(C, 5) + H(D, E, A) + B + W10 + K3); \ + D = ROTL(D, 30); \ + W11 = ROTL(W08 ^ W03 ^ W13 ^ W11, 1); \ + A = SPH_T32(ROTL(B, 5) + H(C, D, E) + A + W11 + K3); \ + C = ROTL(C, 30); \ + W12 = ROTL(W09 ^ W04 ^ W14 ^ W12, 1); \ + E = SPH_T32(ROTL(A, 5) + I(B, C, D) + E + W12 + K4); \ + B = ROTL(B, 30); \ + W13 = ROTL(W10 ^ W05 ^ W15 ^ W13, 1); \ + D = SPH_T32(ROTL(E, 5) + I(A, B, C) + D + W13 + K4); \ + A = ROTL(A, 30); \ + W14 = ROTL(W11 ^ W06 ^ W00 ^ W14, 1); \ + C = SPH_T32(ROTL(D, 5) + I(E, A, B) + C + W14 + K4); \ + E = ROTL(E, 30); \ + W15 = ROTL(W12 ^ W07 ^ W01 ^ W15, 1); \ + B = SPH_T32(ROTL(C, 5) + I(D, E, A) + B + W15 + K4); \ + D = ROTL(D, 30); \ + W00 = ROTL(W13 ^ W08 ^ W02 ^ W00, 1); \ + A = SPH_T32(ROTL(B, 5) + I(C, D, E) + A + W00 + K4); \ + C = ROTL(C, 30); \ + W01 = ROTL(W14 ^ W09 ^ W03 ^ W01, 1); \ + E = SPH_T32(ROTL(A, 5) + I(B, C, D) + E + W01 + K4); \ + B = ROTL(B, 30); \ + W02 = ROTL(W15 ^ W10 ^ W04 ^ W02, 1); \ + D = SPH_T32(ROTL(E, 5) + I(A, B, C) + D + W02 + K4); \ + A = ROTL(A, 30); \ + W03 = ROTL(W00 ^ W11 ^ W05 ^ W03, 1); \ + C = SPH_T32(ROTL(D, 5) + I(E, A, B) + C + W03 + K4); \ + E = ROTL(E, 30); \ + W04 = ROTL(W01 ^ W12 ^ W06 ^ W04, 1); \ + B = SPH_T32(ROTL(C, 5) + I(D, E, A) + B + W04 + K4); \ + D = ROTL(D, 30); \ + W05 = ROTL(W02 ^ W13 ^ W07 ^ W05, 1); \ + A = SPH_T32(ROTL(B, 5) + I(C, D, E) + A + W05 + K4); \ + C = ROTL(C, 30); \ + W06 = ROTL(W03 ^ W14 ^ W08 ^ W06, 1); \ + E = SPH_T32(ROTL(A, 5) + I(B, C, D) + E + W06 + K4); \ + B = ROTL(B, 30); \ + W07 = ROTL(W04 ^ W15 ^ W09 ^ W07, 1); \ + D = SPH_T32(ROTL(E, 5) + I(A, B, C) + D + W07 + K4); \ + A = ROTL(A, 30); \ + W08 = ROTL(W05 ^ W00 ^ W10 ^ W08, 1); \ + C = SPH_T32(ROTL(D, 5) + I(E, A, B) + C + W08 + K4); \ + E = ROTL(E, 30); \ + W09 = ROTL(W06 ^ W01 ^ W11 ^ W09, 1); \ + B = SPH_T32(ROTL(C, 5) + I(D, E, A) + B + W09 + K4); \ + D = ROTL(D, 30); \ + W10 = ROTL(W07 ^ W02 ^ W12 ^ W10, 1); \ + A = SPH_T32(ROTL(B, 5) + I(C, D, E) + A + W10 + K4); \ + C = ROTL(C, 30); \ + W11 = ROTL(W08 ^ W03 ^ W13 ^ W11, 1); \ + E = SPH_T32(ROTL(A, 5) + I(B, C, D) + E + W11 + K4); \ + B = ROTL(B, 30); \ + W12 = ROTL(W09 ^ W04 ^ W14 ^ W12, 1); \ + D = SPH_T32(ROTL(E, 5) + I(A, B, C) + D + W12 + K4); \ + A = ROTL(A, 30); \ + W13 = ROTL(W10 ^ W05 ^ W15 ^ W13, 1); \ + C = SPH_T32(ROTL(D, 5) + I(E, A, B) + C + W13 + K4); \ + E = ROTL(E, 30); \ + W14 = ROTL(W11 ^ W06 ^ W00 ^ W14, 1); \ + B = SPH_T32(ROTL(C, 5) + I(D, E, A) + B + W14 + K4); \ + D = ROTL(D, 30); \ + W15 = ROTL(W12 ^ W07 ^ W01 ^ W15, 1); \ + A = SPH_T32(ROTL(B, 5) + I(C, D, E) + A + W15 + K4); \ + C = ROTL(C, 30); \ + \ + (r)[0] = SPH_T32(r[0] + A); \ + (r)[1] = SPH_T32(r[1] + B); \ + (r)[2] = SPH_T32(r[2] + C); \ + (r)[3] = SPH_T32(r[3] + D); \ + (r)[4] = SPH_T32(r[4] + E); \ + } while (0) + +/* + * One round of SHA-1. The data must be aligned for 32-bit access. + */ +#if ( defined(__x86_64__) && defined(__SHA__) ) || ( defined(__aarch64__) && defined(__ARM_FEATURE_SHA2) ) + +static void +sha1_round( const unsigned char *data, sph_u32 r[5] ) +{ + sha1_transform_be( (uint32_t*)r, (uint32_t*)data, (const uint32_t*)r ); +} + +#else + +static void +sha1_round( const unsigned char *data, sph_u32 r[5] ) +{ +#define SHA1_IN(x) sph_dec32be_aligned(data + (4 * (x))) + SHA1_ROUND_BODY(SHA1_IN, r); +#undef SHA1_IN +} + +#endif + +/* see sph_sha1.h */ +void +sph_sha1_init(void *cc) +{ + sph_sha1_context *sc; + + sc = cc; + memcpy(sc->val, IV, sizeof IV); +#if SPH_64 + sc->count = 0; +#else + sc->count_high = sc->count_low = 0; +#endif +} + +#define RFUN sha1_round +#define HASH sha1 +#define BE32 1 +#include "md_helper.c" + +/* see sph_sha1.h */ +void +sph_sha1_close(void *cc, void *dst) +{ + sha1_close(cc, dst, 5); + sph_sha1_init(cc); +} + +/* see sph_sha1.h */ +void +sph_sha1_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst) +{ + sha1_addbits_and_close(cc, ub, n, dst, 5); + sph_sha1_init(cc); +} + +/* see sph_sha1.h */ +void +sph_sha1_comp(const sph_u32 msg[16], sph_u32 val[5]) +{ +#define SHA1_IN(x) msg[x] + SHA1_ROUND_BODY(SHA1_IN, val); +#undef SHA1_IN +} + + +void sph_sha1_full( void *hash, const void *msg, size_t len ) +{ + sph_sha1_context cc; + sph_sha1_init( &cc ); + sph_sha1( &cc, msg, len ); + sph_sha1_close( &cc, hash ); +} diff --git a/algo/sha/sph_sha1.h b/algo/sha/sph_sha1.h new file mode 100644 index 00000000..f3834da3 --- /dev/null +++ b/algo/sha/sph_sha1.h @@ -0,0 +1,133 @@ +/* $Id: sph_sha1.h 216 2010-06-08 09:46:57Z tp $ */ +/** + * SHA-1 interface. + * + * SHA-1 is described in FIPS 180-1 (now superseded by FIPS 180-2, but the + * description of SHA-1 is still included and has not changed). FIPS + * standards can be found at: http://csrc.nist.gov/publications/fips/ + * + * @warning A theoretical collision attack against SHA-1, with work + * factor 2^63, has been published. SHA-1 should not be used in new + * protocol designs. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2007-2010 Projet RNRT SAPHIR + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @file sph_sha1.h + * @author Thomas Pornin + */ + +#ifndef SPH_SHA1_H__ +#define SPH_SHA1_H__ + +#include +#include "compat/sph_types.h" + +/** + * Output size (in bits) for SHA-1. + */ +#define SPH_SIZE_sha1 160 + +/** + * This structure is a context for SHA-1 computations: it contains the + * intermediate values and some data from the last entered block. Once + * a SHA-1 computation has been performed, the context can be reused for + * another computation. + * + * The contents of this structure are private. A running SHA-1 computation + * can be cloned by copying the context (e.g. with a simple + * memcpy()). + */ +typedef struct { +#ifndef DOXYGEN_IGNORE + unsigned char buf[64]; /* first field, for alignment */ + sph_u32 val[5]; +#if SPH_64 + sph_u64 count; +#else + sph_u32 count_high, count_low; +#endif +#endif +} sph_sha1_context; + +/** + * Initialize a SHA-1 context. This process performs no memory allocation. + * + * @param cc the SHA-1 context (pointer to a sph_sha1_context) + */ +void sph_sha1_init(void *cc); + +/** + * Process some data bytes. It is acceptable that len is zero + * (in which case this function does nothing). + * + * @param cc the SHA-1 context + * @param data the input data + * @param len the input data length (in bytes) + */ +void sph_sha1(void *cc, const void *data, size_t len); + +/** + * Terminate the current SHA-1 computation and output the result into the + * provided buffer. The destination buffer must be wide enough to + * accomodate the result (20 bytes). The context is automatically + * reinitialized. + * + * @param cc the SHA-1 context + * @param dst the destination buffer + */ +void sph_sha1_close(void *cc, void *dst); + +/** + * Add a few additional bits (0 to 7) to the current computation, then + * terminate it and output the result in the provided buffer, which must + * be wide enough to accomodate the result (20 bytes). If bit number i + * in ub has value 2^i, then the extra bits are those + * numbered 7 downto 8-n (this is the big-endian convention at the byte + * level). The context is automatically reinitialized. + * + * @param cc the SHA-1 context + * @param ub the extra bits + * @param n the number of extra bits (0 to 7) + * @param dst the destination buffer + */ +void sph_sha1_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst); + +/** + * Apply the SHA-1 compression function on the provided data. The + * msg parameter contains the 16 32-bit input blocks, + * as numerical values (hence after the big-endian decoding). The + * val parameter contains the 5 32-bit input blocks for + * the compression function; the output is written in place in this + * array. + * + * @param msg the message block (16 values) + * @param val the function 160-bit input and output + */ +void sph_sha1_comp(const sph_u32 msg[16], sph_u32 val[5]); + +void sph_sha1_full( void *hash, const void *msg, size_t len ); + +#endif diff --git a/algo/shavite/sph-shavite-aesni.c b/algo/shavite/sph-shavite-aesni.c index 61743866..e9f5894a 100644 --- a/algo/shavite/sph-shavite-aesni.c +++ b/algo/shavite/sph-shavite-aesni.c @@ -60,7 +60,6 @@ static const sph_u32 IV512[] = { static void c512( sph_shavite_big_context *sc, const void *msg ) { - const v128_t zero = v128_zero; v128_t p0, p1, p2, p3, x; v128_t k00, k01, k02, k03, k10, k11, k12, k13; v128_t *m = (v128_t*)msg; @@ -76,39 +75,39 @@ c512( sph_shavite_big_context *sc, const void *msg ) k00 = m[0]; x = v128_xor( p1, k00 ); - x = v128_aesenc( x, zero ); + x = v128_aesenc_nokey( x ); k01 = m[1]; x = v128_xor( x, k01 ); - x = v128_aesenc( x, zero ); + x = v128_aesenc_nokey( x ); k02 = m[2]; x = v128_xor( x, k02 ); - x = v128_aesenc( x, zero ); + x = v128_aesenc_nokey( x ); k03 = m[3]; x = v128_xor( x, k03 ); - x = v128_aesenc( x, zero ); + x = v128_aesenc_nokey( x ); p0 = v128_xor( p0, x ); k10 = m[4]; x = v128_xor( p3, k10 ); - x = v128_aesenc( x, zero ); + x = v128_aesenc_nokey( x ); k11 = m[5]; x = v128_xor( x, k11 ); - x = v128_aesenc( x, zero ); + x = v128_aesenc_nokey( x ); k12 = m[6]; x = v128_xor( x, k12 ); - x = v128_aesenc( x, zero ); + x = v128_aesenc_nokey( x ); k13 = m[7]; x = v128_xor( x, k13 ); - x = v128_aesenc( x, zero ); + x = v128_aesenc_nokey( x ); p2 = v128_xor( p2, x ); for ( r = 0; r < 3; r ++ ) { // round 1, 5, 9 - k00 = v128_shuflr32( v128_aesenc( k00, zero ) ); + k00 = v128_shuflr32( v128_aesenc_nokey( k00 ) ); k00 = v128_xor( k00, k13 ); if ( r == 0 ) @@ -116,8 +115,8 @@ c512( sph_shavite_big_context *sc, const void *msg ) ~sc->count3, sc->count2, sc->count1, sc->count0 ) ); x = v128_xor( p0, k00 ); - x = v128_aesenc( x, zero ); - k01 = v128_shuflr32( v128_aesenc( k01, zero ) ); + x = v128_aesenc_nokey( x ); + k01 = v128_shuflr32( v128_aesenc_nokey( k01 ) ); k01 = v128_xor( k01, k00 ); if ( r == 1 ) @@ -125,32 +124,32 @@ c512( sph_shavite_big_context *sc, const void *msg ) ~sc->count0, sc->count1, sc->count2, sc->count3 ) ); x = v128_xor( x, k01 ); - x = v128_aesenc( x, zero ); - k02 = v128_shuflr32( v128_aesenc( k02, zero ) ); + x = v128_aesenc_nokey( x ); + k02 = v128_shuflr32( v128_aesenc_nokey( k02 ) ); k02 = v128_xor( k02, k01 ); x = v128_xor( x, k02 ); - x = v128_aesenc( x, zero ); - k03 = v128_shuflr32( v128_aesenc( k03, zero ) ); + x = v128_aesenc_nokey( x ); + k03 = v128_shuflr32( v128_aesenc_nokey( k03 ) ); k03 = v128_xor( k03, k02 ); x = v128_xor( x, k03 ); - x = v128_aesenc( x, zero ); + x = v128_aesenc_nokey( x ); p3 = v128_xor( p3, x ); - k10 = v128_shuflr32( v128_aesenc( k10, zero ) ); + k10 = v128_shuflr32( v128_aesenc_nokey( k10 ) ); k10 = v128_xor( k10, k03 ); x = v128_xor( p2, k10 ); - x = v128_aesenc( x, zero ); - k11 = v128_shuflr32( v128_aesenc( k11, zero ) ); + x = v128_aesenc_nokey( x ); + k11 = v128_shuflr32( v128_aesenc_nokey( k11 ) ); k11 = v128_xor( k11, k10 ); x = v128_xor( x, k11 ); - x = v128_aesenc( x, zero ); - k12 = v128_shuflr32( v128_aesenc( k12, zero ) ); + x = v128_aesenc_nokey( x ); + k12 = v128_shuflr32( v128_aesenc_nokey( k12 ) ); k12 = v128_xor( k12, k11 ); x = v128_xor( x, k12 ); - x = v128_aesenc( x, zero ); - k13 = v128_shuflr32( v128_aesenc( k13, zero ) ); + x = v128_aesenc_nokey( x ); + k13 = v128_shuflr32( v128_aesenc_nokey( k13 ) ); k13 = v128_xor( k13, k12 ); if ( r == 2 ) @@ -158,78 +157,78 @@ c512( sph_shavite_big_context *sc, const void *msg ) ~sc->count1, sc->count0, sc->count3, sc->count2 ) ); x = v128_xor( x, k13 ); - x = v128_aesenc( x, zero ); + x = v128_aesenc_nokey( x ); p1 = v128_xor( p1, x ); // round 2, 6, 10 k00 = v128_xor( k00, v128_alignr8( k13, k12, 4 ) ); x = v128_xor( p3, k00 ); - x = v128_aesenc( x, zero ); + x = v128_aesenc_nokey( x ); k01 = v128_xor( k01, v128_alignr8( k00, k13, 4 ) ); x = v128_xor( x, k01 ); - x = v128_aesenc( x, zero ); + x = v128_aesenc_nokey( x ); k02 = v128_xor( k02, v128_alignr8( k01, k00, 4 ) ); x = v128_xor( x, k02 ); - x = v128_aesenc( x, zero ); + x = v128_aesenc_nokey( x ); k03 = v128_xor( k03, v128_alignr8( k02, k01, 4 ) ); x = v128_xor( x, k03 ); - x = v128_aesenc( x, zero ); + x = v128_aesenc_nokey( x ); p2 = v128_xor( p2, x ); k10 = v128_xor( k10, v128_alignr8( k03, k02, 4 ) ); x = v128_xor( p1, k10 ); - x = v128_aesenc( x, zero ); + x = v128_aesenc_nokey( x ); k11 = v128_xor( k11, v128_alignr8( k10, k03, 4 ) ); x = v128_xor( x, k11 ); - x = v128_aesenc( x, zero ); + x = v128_aesenc_nokey( x ); k12 = v128_xor( k12, v128_alignr8( k11, k10, 4 ) ); x = v128_xor( x, k12 ); - x = v128_aesenc( x, zero ); + x = v128_aesenc_nokey( x ); k13 = v128_xor( k13, v128_alignr8( k12, k11, 4 ) ); x = v128_xor( x, k13 ); - x = v128_aesenc( x, zero ); + x = v128_aesenc_nokey( x ); p0 = v128_xor( p0, x ); // round 3, 7, 11 - k00 = v128_shuflr32( v128_aesenc( k00, zero ) ); + k00 = v128_shuflr32( v128_aesenc_nokey( k00 ) ); k00 = v128_xor( k00, k13 ); x = v128_xor( p2, k00 ); - x = v128_aesenc( x, zero ); - k01 = v128_shuflr32( v128_aesenc( k01, zero ) ); + x = v128_aesenc_nokey( x ); + k01 = v128_shuflr32( v128_aesenc_nokey( k01 ) ); k01 = v128_xor( k01, k00 ); x = v128_xor( x, k01 ); - x = v128_aesenc( x, zero ); - k02 = v128_shuflr32( v128_aesenc( k02, zero ) ); + x = v128_aesenc_nokey( x ); + k02 = v128_shuflr32( v128_aesenc_nokey( k02 ) ); k02 = v128_xor( k02, k01 ); x = v128_xor( x, k02 ); - x = v128_aesenc( x, zero ); - k03 = v128_shuflr32( v128_aesenc( k03, zero ) ); + x = v128_aesenc_nokey( x ); + k03 = v128_shuflr32( v128_aesenc_nokey( k03 ) ); k03 = v128_xor( k03, k02 ); x = v128_xor( x, k03 ); - x = v128_aesenc( x, zero ); + x = v128_aesenc_nokey( x ); p1 = v128_xor( p1, x ); - k10 = v128_shuflr32( v128_aesenc( k10, zero ) ); + k10 = v128_shuflr32( v128_aesenc_nokey( k10 ) ); k10 = v128_xor( k10, k03 ); x = v128_xor( p0, k10 ); - x = v128_aesenc( x, zero ); - k11 = v128_shuflr32( v128_aesenc( k11, zero ) ); + x = v128_aesenc_nokey( x ); + k11 = v128_shuflr32( v128_aesenc_nokey( k11 ) ); k11 = v128_xor( k11, k10 ); x = v128_xor( x, k11 ); - x = v128_aesenc( x, zero ); - k12 = v128_shuflr32( v128_aesenc( k12, zero ) ); + x = v128_aesenc_nokey( x ); + k12 = v128_shuflr32( v128_aesenc_nokey( k12 ) ); k12 = v128_xor( k12, k11 ); x = v128_xor( x, k12 ); - x = v128_aesenc( x, zero ); - k13 = v128_shuflr32( v128_aesenc( k13, zero ) ); + x = v128_aesenc_nokey( x ); + k13 = v128_shuflr32( v128_aesenc_nokey( k13 ) ); k13 = v128_xor( k13, k12 ); x = v128_xor( x, k13 ); - x = v128_aesenc( x, zero ); + x = v128_aesenc_nokey( x ); p3 = v128_xor( p3, x ); @@ -237,73 +236,73 @@ c512( sph_shavite_big_context *sc, const void *msg ) k00 = v128_xor( k00, v128_alignr8( k13, k12, 4 ) ); x = v128_xor( p1, k00 ); - x = v128_aesenc( x, zero ); + x = v128_aesenc_nokey( x ); k01 = v128_xor( k01, v128_alignr8( k00, k13, 4 ) ); x = v128_xor( x, k01 ); - x = v128_aesenc( x, zero ); + x = v128_aesenc_nokey( x ); k02 = v128_xor( k02, v128_alignr8( k01, k00, 4 ) ); x = v128_xor( x, k02 ); - x = v128_aesenc( x, zero ); + x = v128_aesenc_nokey( x ); k03 = v128_xor( k03, v128_alignr8( k02, k01, 4 ) ); x = v128_xor( x, k03 ); - x = v128_aesenc( x, zero ); + x = v128_aesenc_nokey( x ); p0 = v128_xor( p0, x ); k10 = v128_xor( k10, v128_alignr8( k03, k02, 4 ) ); x = v128_xor( p3, k10 ); - x = v128_aesenc( x, zero ); + x = v128_aesenc_nokey( x ); k11 = v128_xor( k11, v128_alignr8( k10, k03, 4 ) ); x = v128_xor( x, k11 ); - x = v128_aesenc( x, zero ); + x = v128_aesenc_nokey( x ); k12 = v128_xor( k12, v128_alignr8( k11, k10, 4 ) ); x = v128_xor( x, k12 ); - x = v128_aesenc( x, zero ); + x = v128_aesenc_nokey( x ); k13 = v128_xor( k13, v128_alignr8( k12, k11, 4 ) ); x = v128_xor( x, k13 ); - x = v128_aesenc( x, zero ); + x = v128_aesenc_nokey( x ); p2 = v128_xor( p2, x ); } // round 13 - k00 = v128_shuflr32( v128_aesenc( k00, zero ) ); + k00 = v128_shuflr32( v128_aesenc_nokey( k00 ) ); k00 = v128_xor( k00, k13 ); x = v128_xor( p0, k00 ); - x = v128_aesenc( x, zero ); - k01 = v128_shuflr32( v128_aesenc( k01, zero ) ); + x = v128_aesenc_nokey( x ); + k01 = v128_shuflr32( v128_aesenc_nokey( k01 ) ); k01 = v128_xor( k01, k00 ); x = v128_xor( x, k01 ); - x = v128_aesenc( x, zero ); - k02 = v128_shuflr32( v128_aesenc( k02, zero ) ); + x = v128_aesenc_nokey( x ); + k02 = v128_shuflr32( v128_aesenc_nokey( k02 ) ); k02 = v128_xor( k02, k01 ); x = v128_xor( x, k02 ); - x = v128_aesenc( x, zero ); - k03 = v128_shuflr32( v128_aesenc( k03, zero ) ); + x = v128_aesenc_nokey( x ); + k03 = v128_shuflr32( v128_aesenc_nokey( k03 ) ); k03 = v128_xor( k03, k02 ); x = v128_xor( x, k03 ); - x = v128_aesenc( x, zero ); + x = v128_aesenc_nokey( x ); p3 = v128_xor( p3, x ); - k10 = v128_shuflr32( v128_aesenc( k10, zero ) ); + k10 = v128_shuflr32( v128_aesenc_nokey( k10 ) ); k10 = v128_xor( k10, k03 ); x = v128_xor( p2, k10 ); - x = v128_aesenc( x, zero ); - k11 = v128_shuflr32( v128_aesenc( k11, zero ) ); + x = v128_aesenc_nokey( x ); + k11 = v128_shuflr32( v128_aesenc_nokey( k11 ) ); k11 = v128_xor( k11, k10 ); x = v128_xor( x, k11 ); - x = v128_aesenc( x, zero ); - k12 = v128_shuflr32( v128_aesenc( k12, zero ) ); + x = v128_aesenc_nokey( x ); + k12 = v128_shuflr32( v128_aesenc_nokey( k12 ) ); k12 = v128_xor( k12, v128_xor( k11, v128_set32( ~sc->count2, sc->count3, sc->count0, sc->count1 ) ) ); x = v128_xor( x, k12 ); - x = v128_aesenc( x, zero ); - k13 = v128_shuflr32( v128_aesenc( k13, zero ) ); + x = v128_aesenc_nokey( x ); + k13 = v128_shuflr32( v128_aesenc_nokey( k13 ) ); k13 = v128_xor( k13, k12 ); x = v128_xor( x, k13 ); - x = v128_aesenc( x, zero ); + x = v128_aesenc_nokey( x ); p1 = v128_xor( p1, x ); diff --git a/algo/simd/simd-hash-2way.c b/algo/simd/simd-hash-2way.c index 7e968d99..5debceca 100644 --- a/algo/simd/simd-hash-2way.c +++ b/algo/simd/simd-hash-2way.c @@ -12,23 +12,8 @@ uint32_t SIMD_IV_512[] __attribute__((aligned(64))) = 0x7eef60a1, 0x6b70e3e8, 0x9c1714d1, 0xb958e2a8, 0xab02675e, 0xed1c014f, 0xcd8d65bb, 0xfdb7a257, 0x09254899, 0xd699c7bc, 0x9019b6dc, 0x2b9022e4, - 0x8fa14956, 0x21bf9bd3, 0xb94d0943, 0x6ffddc22 }; - -#if defined(__x86_64__) - -#define SHUFXOR_1 0xb1 // rev64_32 -#define SHUFXOR_2 0x4e // rev64 -#define SHUFXOR_3 0x1b // rev32 - -#elif defined(__aarch64__) - -#define SHUFXOR_1(x) vrev64q_u32(x) -#define SHUFXOR_2(x) v128_rev64(x) -#define SHUFXOR_3(x) v128_rev64( v128_qrev32(x) ) - -#else - -#endif + 0x8fa14956, 0x21bf9bd3, 0xb94d0943, 0x6ffddc22 +}; #define CAT(x, y) x##y #define XCAT(x,y) CAT(x,y) @@ -89,8 +74,8 @@ uint32_t SIMD_IV_512[] __attribute__((aligned(64))) = #define SUM7_65 4 #define SUM7_66 5 -#define PERM( z, d, a, shufxor ) \ - XCAT( PERM_, XCAT( SUM7_ ## z, PERM_START ) )( d, a, shufxor ) +#define PERM( p, z, d, a, shufxor ) \ + XCAT( PERM_, XCAT( SUM7_ ## z, p ) )( d, a, shufxor ) #define PERM_0( d, a, shufxor ) /* XOR 1 */ \ do { \ @@ -188,16 +173,22 @@ static const m128_v16 FFT256_twiddle[] __attribute__((aligned(64))) = #if defined(__x86_64__) -#define shufxor(x,s) _mm_shuffle_epi32( x, XCAT( SHUFXOR_, s )) +#define SHUFXOR_1(x) _mm_shuffle_epi32(x,0xb1) +#define SHUFXOR_2(x) _mm_shuffle_epi32(x,0x4e) +#define SHUFXOR_3(x) _mm_shuffle_epi32(x,0x1b) #elif defined(__aarch64__) -#define shufxor(x,s) XCAT( SHUFXOR_, s )(x) +#define SHUFXOR_1(x) vrev64q_u32(x) +#define SHUFXOR_2(x) v128_rev64(x) +#define SHUFXOR_3(x) v128_rev64(v128_qrev32(x)) #else -//#warning __FILE__ "Unknown or unsupported CPU architecture" +//unknown or unsupported architecture #endif +#define shufxor(x,s) XCAT(SHUFXOR_,s)(x) + #define REDUCE(x) \ v128_sub16( v128_and( x, v128_64( \ 0x00ff00ff00ff00ff ) ), v128_sra16( x, 8 ) ) @@ -513,7 +504,7 @@ static void ROUNDS512( uint32_t *state, const uint8_t *msg, uint16_t *fft ) #define Fl(a,b,c,fun) F_##fun (a##l,b##l,c##l) #define Fh(a,b,c,fun) F_##fun (a##h,b##h,c##h) -#define STEP_1_(a,b,c,d,w,fun,r,s,z) \ +#define STEP_1_( a,b,c,d,w,fun,r,s,z,p ) \ do { \ TTl = Fl( a,b,c,fun ); \ TTh = Fh( a,b,c,fun ); \ @@ -525,10 +516,10 @@ do { \ TTh = v128_add32( TTh, w##h ); \ TTl = v128_rol32( TTl, s ); \ TTh = v128_rol32( TTh, s ); \ - PERM( z,d,a, shufxor ); \ + PERM( p, z,d,a, shufxor ); \ } while(0) -#define STEP_1( a,b,c,d,w,fun,r,s,z ) STEP_1_( a,b,c,d,w,fun,r,s,z ) +#define STEP_1( a,b,c,d,w,fun,r,s,z,p ) STEP_1_( a,b,c,d,w,fun,r,s,z,p ) #define STEP_2_( a,b,c,d,w,fun,r,s ) \ do { \ @@ -538,10 +529,10 @@ do { \ #define STEP_2( a,b,c,d,w,fun,r,s ) STEP_2_( a,b,c,d,w,fun,r,s ) -#define STEP( a,b,c,d,w1,w2,fun,r,s,z ) \ +#define STEP( a,b,c,d,w1,w2,fun,r,s,z,p ) \ do { \ register v128u32_t TTl, TTh, Wl=w1, Wh=w2; \ - STEP_1( a,b,c,d,W,fun,r,s,z ); \ + STEP_1( a,b,c,d,W,fun,r,s,z,p ); \ STEP_2( a,b,c,d,W,fun,r,s ); \ } while(0); @@ -558,63 +549,45 @@ do { \ w##h = v128_mul16( w##h, code[z].v128 ); \ } while(0) -#define ROUND( h0,l0,u0,h1,l1,u1,h2,l2,u2,h3,l3,u3,fun,r,s,t,u,z ) \ +#define ROUND( h0,l0,u0,h1,l1,u1,h2,l2,u2,h3,l3,u3,fun,r,s,t,u,z,p ) \ do { \ register v128u32_t W0l, W1l, W2l, W3l, TTl; \ register v128u32_t W0h, W1h, W2h, W3h, TTh; \ MSG( W0, h0, l0, u0, z ); \ - STEP_1( S(0), S(1), S(2), S(3), W0, fun, r, s, 0 ); \ + STEP_1( S(0), S(1), S(2), S(3), W0, fun, r, s, 0, p ); \ MSG( W1, h1, l1, u1, z ); \ STEP_2( S(0), S(1), S(2), S(3), W0, fun, r, s ); \ - STEP_1( S(3), S(0), S(1), S(2), W1, fun, s, t, 1 ); \ + STEP_1( S(3), S(0), S(1), S(2), W1, fun, s, t, 1, p ); \ MSG( W2,h2,l2,u2,z ); \ STEP_2( S(3), S(0), S(1), S(2), W1, fun, s, t ); \ - STEP_1( S(2), S(3), S(0), S(1), W2, fun, t, u, 2 ); \ + STEP_1( S(2), S(3), S(0), S(1), W2, fun, t, u, 2, p ); \ MSG( W3,h3,l3,u3,z ); \ STEP_2( S(2), S(3), S(0), S(1), W2, fun, t, u ); \ - STEP_1( S(1), S(2), S(3), S(0), W3, fun, u, r, 3 ); \ + STEP_1( S(1), S(2), S(3), S(0), W3, fun, u, r, 3, p ); \ STEP_2( S(1), S(2), S(3), S(0), W3, fun, u, r ); \ } while(0) // 4 rounds with code 185 -#define PERM_START 0 - ROUND( 2, 10, l, 3, 11, l, 0, 8, l, 1, 9, l, 0, 3, 23, 17, 27, 0); -#undef PERM_START -#define PERM_START 4 - ROUND( 3, 11, h, 2, 10, h, 1, 9, h, 0, 8, h, 1, 3, 23, 17, 27, 0); -#undef PERM_START -#define PERM_START 1 - ROUND( 7, 15, h, 5, 13, h, 6, 14, l, 4, 12, l, 0, 28, 19, 22, 7, 0); -#undef PERM_START -#define PERM_START 5 - ROUND( 4, 12, h, 6, 14, h, 5, 13, l, 7, 15, l, 1, 28, 19, 22, 7, 0); -#undef PERM_START + ROUND( 2, 10, l, 3, 11, l, 0, 8, l, 1, 9, l, 0, 3, 23, 17, 27, 0, 0); + ROUND( 3, 11, h, 2, 10, h, 1, 9, h, 0, 8, h, 1, 3, 23, 17, 27, 0, 4); + ROUND( 7, 15, h, 5, 13, h, 6, 14, l, 4, 12, l, 0, 28, 19, 22, 7, 0, 1); + ROUND( 4, 12, h, 6, 14, h, 5, 13, l, 7, 15, l, 1, 28, 19, 22, 7, 0, 5); // 4 rounds with code 233 -#define PERM_START 2 - ROUND( 0, 4, h, 1, 5, l, 3, 7, h, 2, 6, l, 0, 29, 9, 15, 5, 1); -#undef PERM_START -#define PERM_START 6 - ROUND( 3, 7, l, 2, 6, h, 0, 4, l, 1, 5, h, 1, 29, 9, 15, 5, 1); -#undef PERM_START -#define PERM_START 3 - ROUND( 11, 15, l, 8, 12, l, 8, 12, h, 11, 15, h, 0, 4, 13, 10, 25, 1); -#undef PERM_START -#define PERM_START 0 - ROUND( 9, 13, h, 10, 14, h, 10, 14, l, 9, 13, l, 1, 4, 13, 10, 25, 1); -#undef PERM_START + ROUND( 0, 4, h, 1, 5, l, 3, 7, h, 2, 6, l, 0, 29, 9, 15, 5, 1, 2); + ROUND( 3, 7, l, 2, 6, h, 0, 4, l, 1, 5, h, 1, 29, 9, 15, 5, 1, 6); + ROUND( 11, 15, l, 8, 12, l, 8, 12, h, 11, 15, h, 0, 4, 13, 10, 25, 1, 3); + ROUND( 9, 13, h, 10, 14, h, 10, 14, l, 9, 13, l, 1, 4, 13, 10, 25, 1, 0); // 1 round as feed-forward -#define PERM_START 4 - STEP( S(0), S(1), S(2), S(3), S[0], S[1], 0, 4, 13, 0 ); - STEP( S(3), S(0), S(1), S(2), S[2], S[3], 0, 13, 10, 1 ); - STEP( S(2), S(3), S(0), S(1), S[4], S[5], 0, 10, 25, 2 ); - STEP( S(1), S(2), S(3), S(0), S[6], S[7], 0, 25, 4, 3 ); + STEP( S(0), S(1), S(2), S(3), S[0], S[1], 0, 4, 13, 0, 4 ); + STEP( S(3), S(0), S(1), S(2), S[2], S[3], 0, 13, 10, 1, 4 ); + STEP( S(2), S(3), S(0), S(1), S[4], S[5], 0, 10, 25, 2, 4 ); + STEP( S(1), S(2), S(3), S(0), S[6], S[7], 0, 25, 4, 3, 4 ); S[0] = S0l; S[1] = S0h; S[2] = S1l; S[3] = S1h; S[4] = S2l; S[5] = S2h; S[6] = S3l; S[7] = S3h; -#undef PERM_START #undef STEP_1 #undef STEP_1_ #undef STEP_2 @@ -732,6 +705,9 @@ int simd512( void *hashval, const void *data, int datalen ) #undef REDUCE_FULL_S #undef DO_REDUCE_FULL_S #undef c1_16 +#undef SHUFXOR_1 +#undef SHUFXOR_2 +#undef SHUFXOR_3 #endif @@ -820,118 +796,12 @@ static const m256_v16 FFT256_Twiddle[] = -30, 55, -58, -65, -95, -40, -98, 94 }} }; -#if 0 -// generic -#define SHUFXOR_1 0xb1 // 0b10110001 -#define SHUFXOR_2 0x4e // 0b01001110 -#define SHUFXOR_3 0x1b // 0b00011011 - -#define CAT(x, y) x##y -#define XCAT(x,y) CAT(x,y) - -#define SUM7_00 0 -#define SUM7_01 1 -#define SUM7_02 2 -#define SUM7_03 3 -#define SUM7_04 4 -#define SUM7_05 5 -#define SUM7_06 6 - -#define SUM7_10 1 -#define SUM7_11 2 -#define SUM7_12 3 -#define SUM7_13 4 -#define SUM7_14 5 -#define SUM7_15 6 -#define SUM7_16 0 - -#define SUM7_20 2 -#define SUM7_21 3 -#define SUM7_22 4 -#define SUM7_23 5 -#define SUM7_24 6 -#define SUM7_25 0 -#define SUM7_26 1 - -#define SUM7_30 3 -#define SUM7_31 4 -#define SUM7_32 5 -#define SUM7_33 6 -#define SUM7_34 0 -#define SUM7_35 1 -#define SUM7_36 2 - -#define SUM7_40 4 -#define SUM7_41 5 -#define SUM7_42 6 -#define SUM7_43 0 -#define SUM7_44 1 -#define SUM7_45 2 -#define SUM7_46 3 - -#define SUM7_50 5 -#define SUM7_51 6 -#define SUM7_52 0 -#define SUM7_53 1 -#define SUM7_54 2 -#define SUM7_55 3 -#define SUM7_56 4 - -#define SUM7_60 6 -#define SUM7_61 0 -#define SUM7_62 1 -#define SUM7_63 2 -#define SUM7_64 3 -#define SUM7_65 4 -#define SUM7_66 5 - - -#define PERM(z,d,a,shufxor) XCAT(PERM_,XCAT(SUM7_##z,PERM_START))(d,a,shufxor) - -#define PERM_0(d,a,shufxor) /* XOR 1 */ \ -do { \ - d##l = shufxor( a##l, 1 ); \ - d##h = shufxor( a##h, 1 ); \ - } while(0) - -#define PERM_1(d,a,shufxor) /* XOR 6 */ \ -do { \ - d##l = shufxor( a##h, 2 ); \ - d##h = shufxor( a##l, 2 ); \ -} while(0) - -#define PERM_2(d,a,shufxor) /* XOR 2 */ \ -do { \ - d##l = shufxor( a##l, 2 ); \ - d##h = shufxor( a##h, 2 ); \ -} while(0) - -#define PERM_3(d,a,shufxor) /* XOR 3 */ \ -do { \ - d##l = shufxor( a##l, 3 ); \ - d##h = shufxor( a##h, 3 ); \ -} while(0) - -#define PERM_4(d,a,shufxor) /* XOR 5 */ \ -do { \ - d##l = shufxor( a##h, 1 ); \ - d##h = shufxor( a##l, 1 ); \ -} while(0) - -#define PERM_5(d,a,shufxor) /* XOR 7 */ \ -do { \ - d##l = shufxor( a##h, 3 ); \ - d##h = shufxor( a##l, 3 ); \ -} while(0) -#define PERM_6(d,a,shufxor) /* XOR 4 */ \ -do { \ - d##l = a##h; \ - d##h = a##l; \ -} while(0) -#endif +#define SHUFXOR_1(x) _mm256_shuffle_epi32(x,0xb1) +#define SHUFXOR_2(x) _mm256_shuffle_epi32(x,0x4e) +#define SHUFXOR_3(x) _mm256_shuffle_epi32(x,0x1b) -#define shufxor2w(x,s) _mm256_shuffle_epi32( x, XCAT( SHUFXOR_, s )) +#define shufxor2w(x,s) XCAT(SHUFXOR_,s)(x) #if defined(__AVX512VL__) //TODO Enable for AVX10_256 @@ -1262,7 +1132,7 @@ static void rounds512_2way( uint32_t *state, const uint8_t *msg, uint16_t *fft ) #define Fl(a,b,c,fun) F_##fun (a##l,b##l,c##l) #define Fh(a,b,c,fun) F_##fun (a##h,b##h,c##h) -#define STEP_1_(a,b,c,d,w,fun,r,s,z) \ +#define STEP_1_(a,b,c,d,w,fun,r,s,z,p ) \ do { \ TTl = Fl( a,b,c,fun ); \ TTh = Fh( a,b,c,fun ); \ @@ -1274,10 +1144,10 @@ do { \ TTh = _mm256_add_epi32( TTh, w##h ); \ TTl = mm256_rol_32( TTl, s ); \ TTh = mm256_rol_32( TTh, s ); \ - PERM( z,d,a, shufxor2w ); \ + PERM( p,z,d,a, shufxor2w ); \ } while(0) -#define STEP_1( a,b,c,d,w,fun,r,s,z ) STEP_1_( a,b,c,d,w,fun,r,s,z ) +#define STEP_1( a,b,c,d,w,fun,r,s,z,p ) STEP_1_( a,b,c,d,w,fun,r,s,z,p ) #define STEP_2_( a,b,c,d,w,fun,r,s ) \ do { \ @@ -1287,10 +1157,10 @@ do { \ #define STEP_2( a,b,c,d,w,fun,r,s ) STEP_2_( a,b,c,d,w,fun,r,s ) -#define STEP( a,b,c,d,w1,w2,fun,r,s,z ) \ +#define STEP( a,b,c,d,w1,w2,fun,r,s,z, p ) \ do { \ register __m256i TTl, TTh, Wl=w1, Wh=w2; \ - STEP_1( a,b,c,d,W,fun,r,s,z ); \ + STEP_1( a,b,c,d,W,fun,r,s,z,p ); \ STEP_2( a,b,c,d,W,fun,r,s ); \ } while(0); @@ -1307,63 +1177,45 @@ do { \ w##h = _mm256_mullo_epi16( w##h, code[z].v256 ); \ } while(0) -#define ROUND( h0,l0,u0,h1,l1,u1,h2,l2,u2,h3,l3,u3,fun,r,s,t,u,z ) \ +#define ROUND( h0,l0,u0,h1,l1,u1,h2,l2,u2,h3,l3,u3,fun,r,s,t,u,z,p ) \ do { \ register __m256i W0l, W1l, W2l, W3l, TTl; \ register __m256i W0h, W1h, W2h, W3h, TTh; \ MSG( W0, h0, l0, u0, z ); \ - STEP_1( S(0), S(1), S(2), S(3), W0, fun, r, s, 0 ); \ + STEP_1( S(0), S(1), S(2), S(3), W0, fun, r, s, 0, p ); \ MSG( W1, h1, l1, u1, z ); \ STEP_2( S(0), S(1), S(2), S(3), W0, fun, r, s ); \ - STEP_1( S(3), S(0), S(1), S(2), W1, fun, s, t, 1 ); \ + STEP_1( S(3), S(0), S(1), S(2), W1, fun, s, t, 1, p ); \ MSG( W2,h2,l2,u2,z ); \ STEP_2( S(3), S(0), S(1), S(2), W1, fun, s, t ); \ - STEP_1( S(2), S(3), S(0), S(1), W2, fun, t, u, 2 ); \ + STEP_1( S(2), S(3), S(0), S(1), W2, fun, t, u, 2, p ); \ MSG( W3,h3,l3,u3,z ); \ STEP_2( S(2), S(3), S(0), S(1), W2, fun, t, u ); \ - STEP_1( S(1), S(2), S(3), S(0), W3, fun, u, r, 3 ); \ + STEP_1( S(1), S(2), S(3), S(0), W3, fun, u, r, 3, p ); \ STEP_2( S(1), S(2), S(3), S(0), W3, fun, u, r ); \ } while(0) // 4 rounds with code 185 -#define PERM_START 0 - ROUND( 2, 10, l, 3, 11, l, 0, 8, l, 1, 9, l, 0, 3, 23, 17, 27, 0); -#undef PERM_START -#define PERM_START 4 - ROUND( 3, 11, h, 2, 10, h, 1, 9, h, 0, 8, h, 1, 3, 23, 17, 27, 0); -#undef PERM_START -#define PERM_START 1 - ROUND( 7, 15, h, 5, 13, h, 6, 14, l, 4, 12, l, 0, 28, 19, 22, 7, 0); -#undef PERM_START -#define PERM_START 5 - ROUND( 4, 12, h, 6, 14, h, 5, 13, l, 7, 15, l, 1, 28, 19, 22, 7, 0); -#undef PERM_START + ROUND( 2, 10, l, 3, 11, l, 0, 8, l, 1, 9, l, 0, 3, 23, 17, 27, 0, 0); + ROUND( 3, 11, h, 2, 10, h, 1, 9, h, 0, 8, h, 1, 3, 23, 17, 27, 0, 4); + ROUND( 7, 15, h, 5, 13, h, 6, 14, l, 4, 12, l, 0, 28, 19, 22, 7, 0, 1); + ROUND( 4, 12, h, 6, 14, h, 5, 13, l, 7, 15, l, 1, 28, 19, 22, 7, 0, 5); // 4 rounds with code 233 -#define PERM_START 2 - ROUND( 0, 4, h, 1, 5, l, 3, 7, h, 2, 6, l, 0, 29, 9, 15, 5, 1); -#undef PERM_START -#define PERM_START 6 - ROUND( 3, 7, l, 2, 6, h, 0, 4, l, 1, 5, h, 1, 29, 9, 15, 5, 1); -#undef PERM_START -#define PERM_START 3 - ROUND( 11, 15, l, 8, 12, l, 8, 12, h, 11, 15, h, 0, 4, 13, 10, 25, 1); -#undef PERM_START -#define PERM_START 0 - ROUND( 9, 13, h, 10, 14, h, 10, 14, l, 9, 13, l, 1, 4, 13, 10, 25, 1); -#undef PERM_START + ROUND( 0, 4, h, 1, 5, l, 3, 7, h, 2, 6, l, 0, 29, 9, 15, 5, 1, 2); + ROUND( 3, 7, l, 2, 6, h, 0, 4, l, 1, 5, h, 1, 29, 9, 15, 5, 1, 6); + ROUND( 11, 15, l, 8, 12, l, 8, 12, h, 11, 15, h, 0, 4, 13, 10, 25, 1, 3); + ROUND( 9, 13, h, 10, 14, h, 10, 14, l, 9, 13, l, 1, 4, 13, 10, 25, 1, 0); // 1 round as feed-forward -#define PERM_START 4 - STEP( S(0), S(1), S(2), S(3), S[0], S[1], 0, 4, 13, 0 ); - STEP( S(3), S(0), S(1), S(2), S[2], S[3], 0, 13, 10, 1 ); - STEP( S(2), S(3), S(0), S(1), S[4], S[5], 0, 10, 25, 2 ); - STEP( S(1), S(2), S(3), S(0), S[6], S[7], 0, 25, 4, 3 ); - + STEP( S(0), S(1), S(2), S(3), S[0], S[1], 0, 4, 13, 0, 4 ); + STEP( S(3), S(0), S(1), S(2), S[2], S[3], 0, 13, 10, 1, 4 ); + STEP( S(2), S(3), S(0), S(1), S[4], S[5], 0, 10, 25, 2, 4 ); + STEP( S(1), S(2), S(3), S(0), S[6], S[7], 0, 25, 4, 3, 4 ); + S[0] = S0l; S[1] = S0h; S[2] = S1l; S[3] = S1h; S[4] = S2l; S[5] = S2h; S[6] = S3l; S[7] = S3h; -#undef PERM_START #undef STEP_1 #undef STEP_1_ #undef STEP_2 @@ -1642,6 +1494,10 @@ int simd512_2way( void *hashval, const void *data, int datalen ) return 0; } +#undef SHUFXOR_1 +#undef SHUFXOR_2 +#undef SHUFXOR_3 + #endif #if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__) @@ -1792,7 +1648,11 @@ static const m512_v16 FFT256_Twiddle4w[] = -30, 55, -58, -65, -95, -40, -98, 94 }} }; -#define shufxor4w(x,s) _mm512_shuffle_epi32( x, XCAT( SHUFXOR_, s )) +#define SHUFXOR_1(x) _mm512_shuffle_epi32(x,0xb1) +#define SHUFXOR_2(x) _mm512_shuffle_epi32(x,0x4e) +#define SHUFXOR_3(x) _mm512_shuffle_epi32(x,0x1b) + +#define shufxor4w(x,s) XCAT(SHUFXOR_,s)(x) #define REDUCE4w(x) \ _mm512_sub_epi16( _mm512_maskz_mov_epi8( 0x5555555555555555, x ), \ @@ -2114,7 +1974,7 @@ static void rounds512_4way( uint32_t *state, const uint8_t *msg, uint16_t *fft ) // targetted -#define STEP_1_(a,b,c,d,w,fun,r,s,z) \ +#define STEP_1_( a,b,c,d,w,fun,r,s,z,p ) \ do { \ TTl = Fl( a,b,c,fun ); \ TTh = Fh( a,b,c,fun ); \ @@ -2126,10 +1986,10 @@ do { \ TTh = _mm512_add_epi32( TTh, w##h ); \ TTl = mm512_rol_32( TTl, s ); \ TTh = mm512_rol_32( TTh, s ); \ - PERM( z,d,a, shufxor4w ); \ + PERM( p,z,d,a, shufxor4w ); \ } while(0) -#define STEP_1( a,b,c,d,w,fun,r,s,z ) STEP_1_( a,b,c,d,w,fun,r,s,z ) +#define STEP_1( a,b,c,d,w,fun,r,s,z,p ) STEP_1_( a,b,c,d,w,fun,r,s,z,p ) #define STEP_2_( a,b,c,d,w,fun,r,s ) \ do { \ @@ -2139,10 +1999,10 @@ do { \ #define STEP_2( a,b,c,d,w,fun,r,s ) STEP_2_( a,b,c,d,w,fun,r,s ) -#define STEP( a,b,c,d,w1,w2,fun,r,s,z ) \ +#define STEP( a,b,c,d,w1,w2,fun,r,s,z,p ) \ do { \ register __m512i TTl, TTh, Wl=w1, Wh=w2; \ - STEP_1( a,b,c,d,W,fun,r,s,z ); \ + STEP_1( a,b,c,d,W,fun,r,s,z,p ); \ STEP_2( a,b,c,d,W,fun,r,s ); \ } while(0); @@ -2159,63 +2019,45 @@ do { \ w##h = _mm512_mullo_epi16( w##h, code[z].v512 ); \ } while(0) -#define ROUND( h0,l0,u0,h1,l1,u1,h2,l2,u2,h3,l3,u3,fun,r,s,t,u,z ) \ +#define ROUND( h0,l0,u0,h1,l1,u1,h2,l2,u2,h3,l3,u3,fun,r,s,t,u,z,p ) \ do { \ register __m512i W0l, W1l, W2l, W3l, TTl; \ register __m512i W0h, W1h, W2h, W3h, TTh; \ MSG( W0, h0, l0, u0, z ); \ - STEP_1( S(0), S(1), S(2), S(3), W0, fun, r, s, 0 ); \ + STEP_1( S(0), S(1), S(2), S(3), W0, fun, r, s, 0,p ); \ MSG( W1, h1, l1, u1, z ); \ STEP_2( S(0), S(1), S(2), S(3), W0, fun, r, s ); \ - STEP_1( S(3), S(0), S(1), S(2), W1, fun, s, t, 1 ); \ + STEP_1( S(3), S(0), S(1), S(2), W1, fun, s, t, 1,p ); \ MSG( W2,h2,l2,u2,z ); \ STEP_2( S(3), S(0), S(1), S(2), W1, fun, s, t ); \ - STEP_1( S(2), S(3), S(0), S(1), W2, fun, t, u, 2 ); \ + STEP_1( S(2), S(3), S(0), S(1), W2, fun, t, u, 2,p ); \ MSG( W3,h3,l3,u3,z ); \ STEP_2( S(2), S(3), S(0), S(1), W2, fun, t, u ); \ - STEP_1( S(1), S(2), S(3), S(0), W3, fun, u, r, 3 ); \ + STEP_1( S(1), S(2), S(3), S(0), W3, fun, u, r, 3,p ); \ STEP_2( S(1), S(2), S(3), S(0), W3, fun, u, r ); \ } while(0) // 4 rounds with code 185 -#define PERM_START 0 - ROUND( 2, 10, l, 3, 11, l, 0, 8, l, 1, 9, l, 0, 3, 23, 17, 27, 0); -#undef PERM_START -#define PERM_START 4 - ROUND( 3, 11, h, 2, 10, h, 1, 9, h, 0, 8, h, 1, 3, 23, 17, 27, 0); -#undef PERM_START -#define PERM_START 1 - ROUND( 7, 15, h, 5, 13, h, 6, 14, l, 4, 12, l, 0, 28, 19, 22, 7, 0); -#undef PERM_START -#define PERM_START 5 - ROUND( 4, 12, h, 6, 14, h, 5, 13, l, 7, 15, l, 1, 28, 19, 22, 7, 0); -#undef PERM_START + ROUND( 2, 10, l, 3, 11, l, 0, 8, l, 1, 9, l, 0, 3, 23, 17, 27, 0, 0); + ROUND( 3, 11, h, 2, 10, h, 1, 9, h, 0, 8, h, 1, 3, 23, 17, 27, 0, 4); + ROUND( 7, 15, h, 5, 13, h, 6, 14, l, 4, 12, l, 0, 28, 19, 22, 7, 0, 1); + ROUND( 4, 12, h, 6, 14, h, 5, 13, l, 7, 15, l, 1, 28, 19, 22, 7, 0, 5); // 4 rounds with code 233 -#define PERM_START 2 - ROUND( 0, 4, h, 1, 5, l, 3, 7, h, 2, 6, l, 0, 29, 9, 15, 5, 1); -#undef PERM_START -#define PERM_START 6 - ROUND( 3, 7, l, 2, 6, h, 0, 4, l, 1, 5, h, 1, 29, 9, 15, 5, 1); -#undef PERM_START -#define PERM_START 3 - ROUND( 11, 15, l, 8, 12, l, 8, 12, h, 11, 15, h, 0, 4, 13, 10, 25, 1); -#undef PERM_START -#define PERM_START 0 - ROUND( 9, 13, h, 10, 14, h, 10, 14, l, 9, 13, l, 1, 4, 13, 10, 25, 1); -#undef PERM_START + ROUND( 0, 4, h, 1, 5, l, 3, 7, h, 2, 6, l, 0, 29, 9, 15, 5, 1, 2); + ROUND( 3, 7, l, 2, 6, h, 0, 4, l, 1, 5, h, 1, 29, 9, 15, 5, 1, 6); + ROUND( 11, 15, l, 8, 12, l, 8, 12, h, 11, 15, h, 0, 4, 13, 10, 25, 1, 3); + ROUND( 9, 13, h, 10, 14, h, 10, 14, l, 9, 13, l, 1, 4, 13, 10, 25, 1, 0); // 1 round as feed-forward -#define PERM_START 4 - STEP( S(0), S(1), S(2), S(3), S[0], S[1], 0, 4, 13, 0 ); - STEP( S(3), S(0), S(1), S(2), S[2], S[3], 0, 13, 10, 1 ); - STEP( S(2), S(3), S(0), S(1), S[4], S[5], 0, 10, 25, 2 ); - STEP( S(1), S(2), S(3), S(0), S[6], S[7], 0, 25, 4, 3 ); - + STEP( S(0), S(1), S(2), S(3), S[0], S[1], 0, 4, 13, 0, 4 ); + STEP( S(3), S(0), S(1), S(2), S[2], S[3], 0, 13, 10, 1, 4 ); + STEP( S(2), S(3), S(0), S(1), S[4], S[5], 0, 10, 25, 2, 4 ); + STEP( S(1), S(2), S(3), S(0), S[6], S[7], 0, 25, 4, 3, 4 ); + S[0] = S0l; S[1] = S0h; S[2] = S1l; S[3] = S1h; S[4] = S2l; S[5] = S2h; S[6] = S3l; S[7] = S3h; -#undef PERM_START #undef STEP_1 #undef STEP_1_ #undef STEP_2 diff --git a/algo/x17/x17-4way.c b/algo/x17/x17-4way.c index 61d00ab8..75626f2f 100644 --- a/algo/x17/x17-4way.c +++ b/algo/x17/x17-4way.c @@ -951,7 +951,7 @@ union _x17_context_overlay #else sph_groestl512_context groestl; #endif -#if defined(__AES__) // || defined(__ARM_FEATURE_AES) +#if defined(__AES__) || defined(__ARM_FEATURE_AES) hashState_echo echo; #else sph_echo512_context echo; @@ -1045,7 +1045,7 @@ int x17_2x64_hash( void *output, const void *input, int thr_id ) sph_simd512_close( &ctx.simd, hash1 ); #endif -#if defined(__AES__) // || defined(__ARM_FEATURE_AES) +#if defined(__AES__) || defined(__ARM_FEATURE_AES) echo_full( &ctx.echo, hash0, 512, hash0, 64 ); echo_full( &ctx.echo, hash1, 512, hash1, 64 ); #else diff --git a/api.c b/api.c index 56035b02..3098701f 100644 --- a/api.c +++ b/api.c @@ -8,6 +8,7 @@ * Software Foundation; either version 2 of the License, or (at your option) * any later version. See COPYING for more details. */ + #define APIVERSION "1.0" #ifdef WIN32 @@ -27,9 +28,9 @@ #include #include #include -#include #include #include +#include "algo/sha/sha1-hash.h" #include "miner.h" #include "sysinfos.c" @@ -208,7 +209,7 @@ static char *remote_seturl(char *params) return buffer; } -/** +/*-hash* * Ask the miner to quit */ static char *remote_quit(char *params) @@ -336,7 +337,6 @@ static int websocket_handshake(SOCKETTYPE c, char *result, char *clientkey) char inpkey[128] = { 0 }; char seckey[64]; uchar sha1[20]; -// SHA_CTX ctx; if (opt_protocol) applog(LOG_DEBUG, "clientkey: %s", clientkey); @@ -346,11 +346,7 @@ static int websocket_handshake(SOCKETTYPE c, char *result, char *clientkey) // SHA-1 test from rfc, returns in base64 "s3pPLMBiTxaQ9kYGzzhZRbK+xOo=" //sprintf(inpkey, "dGhlIHNhbXBsZSBub25jZQ==258EAFA5-E914-47DA-95CA-C5AB0DC85B11"); - SHA1( inpkey, strlen(inpkey), sha1 ); -// Deprecated in openssl-3 -// SHA1_Init(&ctx); -// SHA1_Update(&ctx, inpkey, strlen(inpkey)); -// SHA1_Final(sha1, &ctx); + sph_sha1_full( sha1, inpkey, strlen(inpkey) ); base64_encode(sha1, 20, seckey, sizeof(seckey)); @@ -733,3 +729,4 @@ void *api_thread(void *userdata) return NULL; } + diff --git a/armbuild-all.sh b/armbuild-all.sh index 05d9b718..328cb33b 100755 --- a/armbuild-all.sh +++ b/armbuild-all.sh @@ -40,4 +40,3 @@ rm -f config.status CFLAGS="-O3 -march=native -Wall -flax-vector-conversions" ./configure --with-curl make -j $nproc strip -s cpuminer -mv cpuminer cpuminer diff --git a/asm/scrypt-arm.S b/asm/scrypt-arm.S deleted file mode 100644 index 437bcc33..00000000 --- a/asm/scrypt-arm.S +++ /dev/null @@ -1,1186 +0,0 @@ -/* - * Copyright 2012, 2014 pooler@litecoinpool.org - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. See COPYING for more details. - */ - -#include - -#if defined(USE_ASM) && defined(__arm__) && defined(__APCS_32__) - -#if defined(__ARM_ARCH_5E__) || defined(__ARM_ARCH_5TE__) || \ - defined(__ARM_ARCH_5TEJ__) || defined(__ARM_ARCH_6__) || \ - defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || \ - defined(__ARM_ARCH_6M__) || defined(__ARM_ARCH_6T2__) || \ - defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) -#define __ARM_ARCH_5E_OR_6__ -#endif - -#if defined(__ARM_ARCH_5E_OR_6__) || defined(__ARM_ARCH_7__) || \ - defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || \ - defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7EM__) -#define __ARM_ARCH_5E_OR_6_OR_7__ -#endif - -#ifdef __ARM_ARCH_5E_OR_6__ - -.macro scrypt_shuffle - add lr, r0, #9*4 - ldmia r0, {r2-r7} - ldmia lr, {r2, r8-r12, lr} - str r3, [r0, #5*4] - str r5, [r0, #15*4] - str r6, [r0, #12*4] - str r7, [r0, #1*4] - ldr r5, [r0, #7*4] - str r2, [r0, #13*4] - str r8, [r0, #2*4] - strd r4, [r0, #10*4] - str r9, [r0, #7*4] - str r10, [r0, #4*4] - str r11, [r0, #9*4] - str lr, [r0, #3*4] - - add r2, r0, #64+0*4 - add lr, r0, #64+9*4 - ldmia r2, {r2-r7} - ldmia lr, {r2, r8-r12, lr} - str r3, [r0, #64+5*4] - str r5, [r0, #64+15*4] - str r6, [r0, #64+12*4] - str r7, [r0, #64+1*4] - ldr r5, [r0, #64+7*4] - str r2, [r0, #64+13*4] - str r8, [r0, #64+2*4] - strd r4, [r0, #64+10*4] - str r9, [r0, #64+7*4] - str r10, [r0, #64+4*4] - str r11, [r0, #64+9*4] - str lr, [r0, #64+3*4] -.endm - -.macro salsa8_core_doubleround_body - add r6, r2, r6 - add r7, r3, r7 - eor r10, r10, r6, ror #25 - add r6, r0, r4 - eor r11, r11, r7, ror #25 - add r7, r1, r5 - strd r10, [sp, #14*4] - eor r12, r12, r6, ror #25 - eor lr, lr, r7, ror #25 - - ldrd r6, [sp, #10*4] - add r2, r10, r2 - add r3, r11, r3 - eor r6, r6, r2, ror #23 - add r2, r12, r0 - eor r7, r7, r3, ror #23 - add r3, lr, r1 - strd r6, [sp, #10*4] - eor r8, r8, r2, ror #23 - eor r9, r9, r3, ror #23 - - ldrd r2, [sp, #6*4] - add r10, r6, r10 - add r11, r7, r11 - eor r2, r2, r10, ror #19 - add r10, r8, r12 - eor r3, r3, r11, ror #19 - add r11, r9, lr - eor r4, r4, r10, ror #19 - eor r5, r5, r11, ror #19 - - ldrd r10, [sp, #2*4] - add r6, r2, r6 - add r7, r3, r7 - eor r10, r10, r6, ror #14 - add r6, r4, r8 - eor r11, r11, r7, ror #14 - add r7, r5, r9 - eor r0, r0, r6, ror #14 - eor r1, r1, r7, ror #14 - - - ldrd r6, [sp, #14*4] - strd r2, [sp, #6*4] - strd r10, [sp, #2*4] - add r6, r11, r6 - add r7, r0, r7 - eor r4, r4, r6, ror #25 - add r6, r1, r12 - eor r5, r5, r7, ror #25 - add r7, r10, lr - eor r2, r2, r6, ror #25 - eor r3, r3, r7, ror #25 - strd r2, [sp, #6*4] - - add r10, r3, r10 - ldrd r6, [sp, #10*4] - add r11, r4, r11 - eor r8, r8, r10, ror #23 - add r10, r5, r0 - eor r9, r9, r11, ror #23 - add r11, r2, r1 - eor r6, r6, r10, ror #23 - eor r7, r7, r11, ror #23 - strd r6, [sp, #10*4] - - add r2, r7, r2 - ldrd r10, [sp, #14*4] - add r3, r8, r3 - eor r12, r12, r2, ror #19 - add r2, r9, r4 - eor lr, lr, r3, ror #19 - add r3, r6, r5 - eor r10, r10, r2, ror #19 - eor r11, r11, r3, ror #19 - - ldrd r2, [sp, #2*4] - add r6, r11, r6 - add r7, r12, r7 - eor r0, r0, r6, ror #14 - add r6, lr, r8 - eor r1, r1, r7, ror #14 - add r7, r10, r9 - eor r2, r2, r6, ror #14 - eor r3, r3, r7, ror #14 -.endm - -.macro salsa8_core - ldmia sp, {r0-r12, lr} - - ldrd r10, [sp, #14*4] - salsa8_core_doubleround_body - ldrd r6, [sp, #6*4] - strd r2, [sp, #2*4] - strd r10, [sp, #14*4] - salsa8_core_doubleround_body - ldrd r6, [sp, #6*4] - strd r2, [sp, #2*4] - strd r10, [sp, #14*4] - salsa8_core_doubleround_body - ldrd r6, [sp, #6*4] - strd r2, [sp, #2*4] - strd r10, [sp, #14*4] - salsa8_core_doubleround_body - - stmia sp, {r0-r5} - strd r8, [sp, #8*4] - str r12, [sp, #12*4] - str lr, [sp, #13*4] - strd r10, [sp, #14*4] -.endm - -#else - -.macro scrypt_shuffle -.endm - -.macro salsa8_core_doubleround_body - ldr r8, [sp, #8*4] - add r11, r11, r10 - ldr lr, [sp, #13*4] - add r12, r12, r3 - eor r2, r2, r11, ror #23 - add r11, r4, r0 - eor r7, r7, r12, ror #23 - add r12, r9, r5 - str r9, [sp, #9*4] - eor r8, r8, r11, ror #23 - str r10, [sp, #14*4] - eor lr, lr, r12, ror #23 - - ldr r11, [sp, #11*4] - add r9, lr, r9 - ldr r12, [sp, #12*4] - add r10, r2, r10 - eor r1, r1, r9, ror #19 - add r9, r7, r3 - eor r6, r6, r10, ror #19 - add r10, r8, r4 - str r8, [sp, #8*4] - eor r11, r11, r9, ror #19 - str lr, [sp, #13*4] - eor r12, r12, r10, ror #19 - - ldr r9, [sp, #10*4] - add r8, r12, r8 - ldr r10, [sp, #15*4] - add lr, r1, lr - eor r0, r0, r8, ror #14 - add r8, r6, r2 - eor r5, r5, lr, ror #14 - add lr, r11, r7 - eor r9, r9, r8, ror #14 - ldr r8, [sp, #9*4] - eor r10, r10, lr, ror #14 - ldr lr, [sp, #14*4] - - - add r8, r9, r8 - str r9, [sp, #10*4] - add lr, r10, lr - str r10, [sp, #15*4] - eor r11, r11, r8, ror #25 - add r8, r0, r3 - eor r12, r12, lr, ror #25 - add lr, r5, r4 - eor r1, r1, r8, ror #25 - ldr r8, [sp, #8*4] - eor r6, r6, lr, ror #25 - - add r9, r11, r9 - ldr lr, [sp, #13*4] - add r10, r12, r10 - eor r8, r8, r9, ror #23 - add r9, r1, r0 - eor lr, lr, r10, ror #23 - add r10, r6, r5 - str r11, [sp, #11*4] - eor r2, r2, r9, ror #23 - str r12, [sp, #12*4] - eor r7, r7, r10, ror #23 - - ldr r9, [sp, #9*4] - add r11, r8, r11 - ldr r10, [sp, #14*4] - add r12, lr, r12 - eor r9, r9, r11, ror #19 - add r11, r2, r1 - eor r10, r10, r12, ror #19 - add r12, r7, r6 - str r8, [sp, #8*4] - eor r3, r3, r11, ror #19 - str lr, [sp, #13*4] - eor r4, r4, r12, ror #19 -.endm - -.macro salsa8_core - ldmia sp, {r0-r7} - - ldr r12, [sp, #15*4] - ldr r8, [sp, #11*4] - ldr lr, [sp, #12*4] - - ldr r9, [sp, #9*4] - add r8, r8, r12 - ldr r11, [sp, #10*4] - add lr, lr, r0 - eor r3, r3, r8, ror #25 - add r8, r5, r1 - ldr r10, [sp, #14*4] - eor r4, r4, lr, ror #25 - add lr, r11, r6 - eor r9, r9, r8, ror #25 - eor r10, r10, lr, ror #25 - - salsa8_core_doubleround_body - - ldr r11, [sp, #10*4] - add r8, r9, r8 - ldr r12, [sp, #15*4] - add lr, r10, lr - eor r11, r11, r8, ror #14 - add r8, r3, r2 - eor r12, r12, lr, ror #14 - add lr, r4, r7 - eor r0, r0, r8, ror #14 - ldr r8, [sp, #11*4] - eor r5, r5, lr, ror #14 - ldr lr, [sp, #12*4] - - add r8, r8, r12 - str r11, [sp, #10*4] - add lr, lr, r0 - str r12, [sp, #15*4] - eor r3, r3, r8, ror #25 - add r8, r5, r1 - eor r4, r4, lr, ror #25 - add lr, r11, r6 - str r9, [sp, #9*4] - eor r9, r9, r8, ror #25 - str r10, [sp, #14*4] - eor r10, r10, lr, ror #25 - - salsa8_core_doubleround_body - - ldr r11, [sp, #10*4] - add r8, r9, r8 - ldr r12, [sp, #15*4] - add lr, r10, lr - eor r11, r11, r8, ror #14 - add r8, r3, r2 - eor r12, r12, lr, ror #14 - add lr, r4, r7 - eor r0, r0, r8, ror #14 - ldr r8, [sp, #11*4] - eor r5, r5, lr, ror #14 - ldr lr, [sp, #12*4] - - add r8, r8, r12 - str r11, [sp, #10*4] - add lr, lr, r0 - str r12, [sp, #15*4] - eor r3, r3, r8, ror #25 - add r8, r5, r1 - eor r4, r4, lr, ror #25 - add lr, r11, r6 - str r9, [sp, #9*4] - eor r9, r9, r8, ror #25 - str r10, [sp, #14*4] - eor r10, r10, lr, ror #25 - - salsa8_core_doubleround_body - - ldr r11, [sp, #10*4] - add r8, r9, r8 - ldr r12, [sp, #15*4] - add lr, r10, lr - eor r11, r11, r8, ror #14 - add r8, r3, r2 - eor r12, r12, lr, ror #14 - add lr, r4, r7 - eor r0, r0, r8, ror #14 - ldr r8, [sp, #11*4] - eor r5, r5, lr, ror #14 - ldr lr, [sp, #12*4] - - add r8, r8, r12 - str r11, [sp, #10*4] - add lr, lr, r0 - str r12, [sp, #15*4] - eor r3, r3, r8, ror #25 - add r8, r5, r1 - eor r4, r4, lr, ror #25 - add lr, r11, r6 - str r9, [sp, #9*4] - eor r9, r9, r8, ror #25 - str r10, [sp, #14*4] - eor r10, r10, lr, ror #25 - - salsa8_core_doubleround_body - - ldr r11, [sp, #10*4] - add r8, r9, r8 - ldr r12, [sp, #15*4] - add lr, r10, lr - str r9, [sp, #9*4] - eor r11, r11, r8, ror #14 - eor r12, r12, lr, ror #14 - add r8, r3, r2 - str r10, [sp, #14*4] - add lr, r4, r7 - str r11, [sp, #10*4] - eor r0, r0, r8, ror #14 - str r12, [sp, #15*4] - eor r5, r5, lr, ror #14 - - stmia sp, {r0-r7} -.endm - -#endif - - -.macro scrypt_core_macro1a_x4 - ldmia r0, {r4-r7} - ldmia lr!, {r8-r11} - stmia r1!, {r4-r7} - stmia r3!, {r8-r11} - eor r4, r4, r8 - eor r5, r5, r9 - eor r6, r6, r10 - eor r7, r7, r11 - stmia r0!, {r4-r7} - stmia r12!, {r4-r7} -.endm - -.macro scrypt_core_macro1b_x4 - ldmia r3!, {r8-r11} - ldmia r2, {r4-r7} - eor r8, r8, r4 - eor r9, r9, r5 - eor r10, r10, r6 - eor r11, r11, r7 - ldmia r0, {r4-r7} - stmia r2!, {r8-r11} - eor r4, r4, r8 - eor r5, r5, r9 - eor r6, r6, r10 - eor r7, r7, r11 - ldmia r1!, {r8-r11} - eor r4, r4, r8 - eor r5, r5, r9 - eor r6, r6, r10 - eor r7, r7, r11 - stmia r0!, {r4-r7} - stmia r12!, {r4-r7} -.endm - -.macro scrypt_core_macro2_x4 - ldmia r12, {r4-r7} - ldmia r0, {r8-r11} - add r4, r4, r8 - add r5, r5, r9 - add r6, r6, r10 - add r7, r7, r11 - stmia r0!, {r4-r7} - ldmia r2, {r8-r11} - eor r4, r4, r8 - eor r5, r5, r9 - eor r6, r6, r10 - eor r7, r7, r11 - stmia r2!, {r4-r7} - stmia r12!, {r4-r7} -.endm - -.macro scrypt_core_macro3_x4 - ldmia r1!, {r4-r7} - ldmia r0, {r8-r11} - add r4, r4, r8 - add r5, r5, r9 - add r6, r6, r10 - add r7, r7, r11 - stmia r0!, {r4-r7} -.endm - -.macro scrypt_core_macro3_x6 - ldmia r1!, {r2-r7} - ldmia r0, {r8-r12, lr} - add r2, r2, r8 - add r3, r3, r9 - add r4, r4, r10 - add r5, r5, r11 - add r6, r6, r12 - add r7, r7, lr - stmia r0!, {r2-r7} -.endm - - - .text - .code 32 - .align 2 - .globl scrypt_core - .globl _scrypt_core -#ifdef __ELF__ - .type scrypt_core, %function -#endif -scrypt_core: -_scrypt_core: - stmfd sp!, {r4-r11, lr} - mov r12, sp - sub sp, sp, #22*4 - bic sp, sp, #63 - str r12, [sp, #20*4] - str r2, [sp, #21*4] - - scrypt_shuffle - - ldr r2, [sp, #21*4] - str r0, [sp, #16*4] - add r12, r1, r2, lsl #7 - str r12, [sp, #18*4] -scrypt_core_loop1: - add lr, r0, #16*4 - add r3, r1, #16*4 - mov r12, sp - scrypt_core_macro1a_x4 - scrypt_core_macro1a_x4 - scrypt_core_macro1a_x4 - scrypt_core_macro1a_x4 - str r1, [sp, #17*4] - - salsa8_core - - ldr r0, [sp, #16*4] - mov r12, sp - add r2, r0, #16*4 - scrypt_core_macro2_x4 - scrypt_core_macro2_x4 - scrypt_core_macro2_x4 - scrypt_core_macro2_x4 - - salsa8_core - - ldr r0, [sp, #16*4] - mov r1, sp - add r0, r0, #16*4 - scrypt_core_macro3_x6 - scrypt_core_macro3_x6 - ldr r3, [sp, #17*4] - ldr r12, [sp, #18*4] - scrypt_core_macro3_x4 - - add r1, r3, #16*4 - sub r0, r0, #32*4 - cmp r1, r12 - bne scrypt_core_loop1 - - ldr r12, [sp, #21*4] - ldr r4, [r0, #16*4] - sub r2, r12, #1 - str r2, [sp, #21*4] - sub r1, r1, r12, lsl #7 - str r1, [sp, #17*4] - and r4, r4, r2 - add r1, r1, r4, lsl #7 -scrypt_core_loop2: - add r2, r0, #16*4 - add r3, r1, #16*4 - str r12, [sp, #18*4] - mov r12, sp -#ifdef __ARM_ARCH_5E_OR_6_OR_7__ - pld [r1, #24*4] - pld [r1, #8*4] -#endif - scrypt_core_macro1b_x4 - scrypt_core_macro1b_x4 - scrypt_core_macro1b_x4 - scrypt_core_macro1b_x4 - - salsa8_core - - ldr r0, [sp, #16*4] - mov r12, sp - add r2, r0, #16*4 - scrypt_core_macro2_x4 - scrypt_core_macro2_x4 - scrypt_core_macro2_x4 - scrypt_core_macro2_x4 - - salsa8_core - - ldr r0, [sp, #16*4] - mov r1, sp - ldr r3, [sp, #17*4] - add r0, r0, #16*4 - ldr r2, [sp, #21*4] - scrypt_core_macro3_x4 - and r4, r4, r2 - add r3, r3, r4, lsl #7 - str r3, [sp, #19*4] -#ifdef __ARM_ARCH_5E_OR_6_OR_7__ - pld [r3, #16*4] - pld [r3] -#endif - scrypt_core_macro3_x6 - scrypt_core_macro3_x6 - - ldr r12, [sp, #18*4] - sub r0, r0, #32*4 - ldr r1, [sp, #19*4] - subs r12, r12, #1 - bne scrypt_core_loop2 - - scrypt_shuffle - - ldr sp, [sp, #20*4] -#ifdef __thumb__ - ldmfd sp!, {r4-r11, lr} - bx lr -#else - ldmfd sp!, {r4-r11, pc} -#endif - - -#ifdef __ARM_NEON__ - -.macro salsa8_core_3way_doubleround - ldrd r6, [sp, #6*4] - vadd.u32 q4, q0, q1 - add r6, r2, r6 - vadd.u32 q6, q8, q9 - add r7, r3, r7 - vshl.u32 q5, q4, #7 - eor r10, r10, r6, ror #25 - vshl.u32 q7, q6, #7 - add r6, r0, r4 - vshr.u32 q4, q4, #32-7 - eor r11, r11, r7, ror #25 - vshr.u32 q6, q6, #32-7 - add r7, r1, r5 - veor.u32 q3, q3, q5 - strd r10, [sp, #14*4] - veor.u32 q11, q11, q7 - eor r12, r12, r6, ror #25 - veor.u32 q3, q3, q4 - eor lr, lr, r7, ror #25 - veor.u32 q11, q11, q6 - - ldrd r6, [sp, #10*4] - vadd.u32 q4, q3, q0 - add r2, r10, r2 - vadd.u32 q6, q11, q8 - add r3, r11, r3 - vshl.u32 q5, q4, #9 - eor r6, r6, r2, ror #23 - vshl.u32 q7, q6, #9 - add r2, r12, r0 - vshr.u32 q4, q4, #32-9 - eor r7, r7, r3, ror #23 - vshr.u32 q6, q6, #32-9 - add r3, lr, r1 - veor.u32 q2, q2, q5 - strd r6, [sp, #10*4] - veor.u32 q10, q10, q7 - eor r8, r8, r2, ror #23 - veor.u32 q2, q2, q4 - eor r9, r9, r3, ror #23 - veor.u32 q10, q10, q6 - - ldrd r2, [sp, #6*4] - vadd.u32 q4, q2, q3 - add r10, r6, r10 - vadd.u32 q6, q10, q11 - add r11, r7, r11 - vext.u32 q3, q3, q3, #3 - eor r2, r2, r10, ror #19 - vshl.u32 q5, q4, #13 - add r10, r8, r12 - vext.u32 q11, q11, q11, #3 - eor r3, r3, r11, ror #19 - vshl.u32 q7, q6, #13 - add r11, r9, lr - vshr.u32 q4, q4, #32-13 - eor r4, r4, r10, ror #19 - vshr.u32 q6, q6, #32-13 - eor r5, r5, r11, ror #19 - veor.u32 q1, q1, q5 - veor.u32 q9, q9, q7 - veor.u32 q1, q1, q4 - veor.u32 q9, q9, q6 - - ldrd r10, [sp, #2*4] - vadd.u32 q4, q1, q2 - add r6, r2, r6 - vadd.u32 q6, q9, q10 - add r7, r3, r7 - vswp.u32 d4, d5 - eor r10, r10, r6, ror #14 - vshl.u32 q5, q4, #18 - add r6, r4, r8 - vswp.u32 d20, d21 - eor r11, r11, r7, ror #14 - vshl.u32 q7, q6, #18 - add r7, r5, r9 - vshr.u32 q4, q4, #32-18 - eor r0, r0, r6, ror #14 - vshr.u32 q6, q6, #32-18 - eor r1, r1, r7, ror #14 - veor.u32 q0, q0, q5 - ldrd r6, [sp, #14*4] - veor.u32 q8, q8, q7 - veor.u32 q0, q0, q4 - veor.u32 q8, q8, q6 - - - strd r2, [sp, #6*4] - vadd.u32 q4, q0, q3 - strd r10, [sp, #2*4] - vadd.u32 q6, q8, q11 - add r6, r11, r6 - vext.u32 q1, q1, q1, #1 - add r7, r0, r7 - vshl.u32 q5, q4, #7 - eor r4, r4, r6, ror #25 - vext.u32 q9, q9, q9, #1 - add r6, r1, r12 - vshl.u32 q7, q6, #7 - eor r5, r5, r7, ror #25 - vshr.u32 q4, q4, #32-7 - add r7, r10, lr - vshr.u32 q6, q6, #32-7 - eor r2, r2, r6, ror #25 - veor.u32 q1, q1, q5 - eor r3, r3, r7, ror #25 - veor.u32 q9, q9, q7 - strd r2, [sp, #6*4] - veor.u32 q1, q1, q4 - veor.u32 q9, q9, q6 - - add r10, r3, r10 - vadd.u32 q4, q1, q0 - ldrd r6, [sp, #10*4] - vadd.u32 q6, q9, q8 - add r11, r4, r11 - vshl.u32 q5, q4, #9 - eor r8, r8, r10, ror #23 - vshl.u32 q7, q6, #9 - add r10, r5, r0 - vshr.u32 q4, q4, #32-9 - eor r9, r9, r11, ror #23 - vshr.u32 q6, q6, #32-9 - add r11, r2, r1 - veor.u32 q2, q2, q5 - eor r6, r6, r10, ror #23 - veor.u32 q10, q10, q7 - eor r7, r7, r11, ror #23 - veor.u32 q2, q2, q4 - strd r6, [sp, #10*4] - veor.u32 q10, q10, q6 - - add r2, r7, r2 - vadd.u32 q4, q2, q1 - ldrd r10, [sp, #14*4] - vadd.u32 q6, q10, q9 - add r3, r8, r3 - vext.u32 q1, q1, q1, #3 - eor r12, r12, r2, ror #19 - vshl.u32 q5, q4, #13 - add r2, r9, r4 - vext.u32 q9, q9, q9, #3 - eor lr, lr, r3, ror #19 - vshl.u32 q7, q6, #13 - add r3, r6, r5 - vshr.u32 q4, q4, #32-13 - eor r10, r10, r2, ror #19 - vshr.u32 q6, q6, #32-13 - eor r11, r11, r3, ror #19 - veor.u32 q3, q3, q5 - veor.u32 q11, q11, q7 - veor.u32 q3, q3, q4 - veor.u32 q11, q11, q6 - - ldrd r2, [sp, #2*4] - vadd.u32 q4, q3, q2 - add r6, r11, r6 - vadd.u32 q6, q11, q10 - add r7, r12, r7 - vswp.u32 d4, d5 - eor r0, r0, r6, ror #14 - vshl.u32 q5, q4, #18 - add r6, lr, r8 - vswp.u32 d20, d21 - eor r1, r1, r7, ror #14 - vshl.u32 q7, q6, #18 - add r7, r10, r9 - vext.u32 q3, q3, q3, #1 - eor r2, r2, r6, ror #14 - vshr.u32 q4, q4, #32-18 - eor r3, r3, r7, ror #14 - vshr.u32 q6, q6, #32-18 - strd r2, [sp, #2*4] - vext.u32 q11, q11, q11, #1 - strd r10, [sp, #14*4] - veor.u32 q0, q0, q5 - veor.u32 q8, q8, q7 - veor.u32 q0, q0, q4 - veor.u32 q8, q8, q6 -.endm - -.macro salsa8_core_3way - ldmia sp, {r0-r12, lr} - ldrd r10, [sp, #14*4] - salsa8_core_3way_doubleround - salsa8_core_3way_doubleround - salsa8_core_3way_doubleround - salsa8_core_3way_doubleround - stmia sp, {r0-r5} - strd r8, [sp, #8*4] - str r12, [sp, #12*4] - str lr, [sp, #13*4] -.endm - - .text - .code 32 - .align 2 - .globl scrypt_core_3way - .globl _scrypt_core_3way -#ifdef __ELF__ - .type scrypt_core_3way, %function -#endif -scrypt_core_3way: -_scrypt_core_3way: - stmfd sp!, {r4-r11, lr} - vpush {q4-q7} - mov r12, sp - sub sp, sp, #24*16 - bic sp, sp, #63 - str r2, [sp, #4*16+3*4] - str r12, [sp, #4*16+4*4] - - mov r3, r0 - vldmia r3!, {q8-q15} - vmov.u64 q0, #0xffffffff - vmov.u32 q1, q8 - vmov.u32 q2, q12 - vbif.u32 q8, q9, q0 - vbif.u32 q12, q13, q0 - vbif.u32 q9, q10, q0 - vbif.u32 q13, q14, q0 - vbif.u32 q10, q11, q0 - vbif.u32 q14, q15, q0 - vbif.u32 q11, q1, q0 - vbif.u32 q15, q2, q0 - vldmia r3!, {q0-q7} - vswp.u32 d17, d21 - vswp.u32 d25, d29 - vswp.u32 d18, d22 - vswp.u32 d26, d30 - vstmia r0, {q8-q15} - vmov.u64 q8, #0xffffffff - vmov.u32 q9, q0 - vmov.u32 q10, q4 - vbif.u32 q0, q1, q8 - vbif.u32 q4, q5, q8 - vbif.u32 q1, q2, q8 - vbif.u32 q5, q6, q8 - vbif.u32 q2, q3, q8 - vbif.u32 q6, q7, q8 - vbif.u32 q3, q9, q8 - vbif.u32 q7, q10, q8 - vldmia r3, {q8-q15} - vswp.u32 d1, d5 - vswp.u32 d9, d13 - vswp.u32 d2, d6 - vswp.u32 d10, d14 - add r12, sp, #8*16 - vstmia r12!, {q0-q7} - vmov.u64 q0, #0xffffffff - vmov.u32 q1, q8 - vmov.u32 q2, q12 - vbif.u32 q8, q9, q0 - vbif.u32 q12, q13, q0 - vbif.u32 q9, q10, q0 - vbif.u32 q13, q14, q0 - vbif.u32 q10, q11, q0 - vbif.u32 q14, q15, q0 - vbif.u32 q11, q1, q0 - vbif.u32 q15, q2, q0 - vswp.u32 d17, d21 - vswp.u32 d25, d29 - vswp.u32 d18, d22 - vswp.u32 d26, d30 - vstmia r12, {q8-q15} - - add lr, sp, #128 - vldmia lr, {q0-q7} - add r2, r1, r2, lsl #7 - str r0, [sp, #4*16+0*4] - str r2, [sp, #4*16+2*4] -scrypt_core_3way_loop1: - add lr, r0, #16*4 - add r3, r1, #16*4 - str r1, [sp, #4*16+1*4] - mov r12, sp - scrypt_core_macro1a_x4 - scrypt_core_macro1a_x4 - scrypt_core_macro1a_x4 - ldr r2, [sp, #4*16+3*4] - scrypt_core_macro1a_x4 - sub r1, r1, #4*16 - - add r1, r1, r2, lsl #7 - vstmia r1, {q0-q7} - add r3, r1, r2, lsl #7 - vstmia r3, {q8-q15} - - add lr, sp, #128 - veor.u32 q0, q0, q4 - veor.u32 q1, q1, q5 - veor.u32 q2, q2, q6 - veor.u32 q3, q3, q7 - vstmia lr, {q0-q3} - veor.u32 q8, q8, q12 - veor.u32 q9, q9, q13 - veor.u32 q10, q10, q14 - veor.u32 q11, q11, q15 - add r12, sp, #256 - vstmia r12, {q8-q11} - - salsa8_core_3way - - ldr r0, [sp, #4*16+0*4] - mov r12, sp - add r2, r0, #16*4 - scrypt_core_macro2_x4 - scrypt_core_macro2_x4 - scrypt_core_macro2_x4 - scrypt_core_macro2_x4 - - add lr, sp, #128 - vldmia lr, {q4-q7} - vadd.u32 q4, q4, q0 - vadd.u32 q5, q5, q1 - vadd.u32 q6, q6, q2 - vadd.u32 q7, q7, q3 - add r12, sp, #256 - vldmia r12, {q0-q3} - vstmia lr, {q4-q7} - vadd.u32 q8, q8, q0 - vadd.u32 q9, q9, q1 - vadd.u32 q10, q10, q2 - vadd.u32 q11, q11, q3 - - add r4, sp, #128+4*16 - vldmia r4, {q0-q3} - vstmia r12, {q8-q11} - veor.u32 q0, q0, q4 - veor.u32 q1, q1, q5 - veor.u32 q2, q2, q6 - veor.u32 q3, q3, q7 - vstmia r4, {q0-q3} - veor.u32 q8, q8, q12 - veor.u32 q9, q9, q13 - veor.u32 q10, q10, q14 - veor.u32 q11, q11, q15 - vmov q12, q8 - vmov q13, q9 - vmov q14, q10 - vmov q15, q11 - - salsa8_core_3way - - ldr r0, [sp, #4*16+0*4] - mov r1, sp - add r0, r0, #16*4 - scrypt_core_macro3_x6 - scrypt_core_macro3_x6 - scrypt_core_macro3_x4 - sub r0, r0, #8*16 - - ldr r1, [sp, #4*16+1*4] - ldr r2, [sp, #4*16+2*4] - add lr, sp, #128 - add r4, sp, #128+4*16 - vldmia r4, {q4-q7} - vadd.u32 q4, q4, q0 - vadd.u32 q5, q5, q1 - vadd.u32 q6, q6, q2 - vadd.u32 q7, q7, q3 - vstmia r4, {q4-q7} - vldmia lr, {q0-q3} - vadd.u32 q12, q12, q8 - vadd.u32 q13, q13, q9 - vadd.u32 q14, q14, q10 - vadd.u32 q15, q15, q11 - add r12, sp, #256 - vldmia r12, {q8-q11} - - add r1, r1, #8*16 - cmp r1, r2 - bne scrypt_core_3way_loop1 - - ldr r2, [sp, #4*16+3*4] - add r5, sp, #256+4*16 - vstmia r5, {q12-q15} - - sub r1, r1, r2, lsl #7 - str r1, [sp, #4*16+1*4] -scrypt_core_3way_loop2: - str r2, [sp, #4*16+2*4] - - ldr r0, [sp, #4*16+0*4] - ldr r1, [sp, #4*16+1*4] - ldr r2, [sp, #4*16+3*4] - ldr r4, [r0, #16*4] - sub r2, r2, #1 - and r4, r4, r2 - add r1, r1, r4, lsl #7 - add r2, r0, #16*4 - add r3, r1, #16*4 - mov r12, sp - scrypt_core_macro1b_x4 - scrypt_core_macro1b_x4 - scrypt_core_macro1b_x4 - scrypt_core_macro1b_x4 - - ldr r1, [sp, #4*16+1*4] - ldr r2, [sp, #4*16+3*4] - add r1, r1, r2, lsl #7 - add r3, r1, r2, lsl #7 - sub r2, r2, #1 - vmov r6, r7, d8 - and r6, r6, r2 - add r6, r1, r6, lsl #7 - vmov r7, r8, d24 - add lr, sp, #128 - vldmia lr, {q0-q3} - pld [r6] - pld [r6, #8*4] - pld [r6, #16*4] - pld [r6, #24*4] - vldmia r6, {q8-q15} - and r7, r7, r2 - add r7, r3, r7, lsl #7 - veor.u32 q8, q8, q0 - veor.u32 q9, q9, q1 - veor.u32 q10, q10, q2 - veor.u32 q11, q11, q3 - pld [r7] - pld [r7, #8*4] - pld [r7, #16*4] - pld [r7, #24*4] - veor.u32 q12, q12, q4 - veor.u32 q13, q13, q5 - veor.u32 q14, q14, q6 - veor.u32 q15, q15, q7 - vldmia r7, {q0-q7} - vstmia lr, {q8-q15} - add r12, sp, #256 - vldmia r12, {q8-q15} - veor.u32 q8, q8, q0 - veor.u32 q9, q9, q1 - veor.u32 q10, q10, q2 - veor.u32 q11, q11, q3 - veor.u32 q12, q12, q4 - veor.u32 q13, q13, q5 - veor.u32 q14, q14, q6 - veor.u32 q15, q15, q7 - - vldmia lr, {q0-q7} - veor.u32 q0, q0, q4 - veor.u32 q1, q1, q5 - veor.u32 q2, q2, q6 - veor.u32 q3, q3, q7 - vstmia lr, {q0-q3} - veor.u32 q8, q8, q12 - veor.u32 q9, q9, q13 - veor.u32 q10, q10, q14 - veor.u32 q11, q11, q15 - vstmia r12, {q8-q15} - - salsa8_core_3way - - ldr r0, [sp, #4*16+0*4] - mov r12, sp - add r2, r0, #16*4 - scrypt_core_macro2_x4 - scrypt_core_macro2_x4 - scrypt_core_macro2_x4 - scrypt_core_macro2_x4 - - add lr, sp, #128 - vldmia lr, {q4-q7} - vadd.u32 q4, q4, q0 - vadd.u32 q5, q5, q1 - vadd.u32 q6, q6, q2 - vadd.u32 q7, q7, q3 - add r12, sp, #256 - vldmia r12, {q12-q15} - vstmia lr, {q4-q7} - vadd.u32 q12, q12, q8 - vadd.u32 q13, q13, q9 - vadd.u32 q14, q14, q10 - vadd.u32 q15, q15, q11 - - add r4, sp, #128+4*16 - vldmia r4, {q0-q3} - vstmia r12, {q12-q15} - veor.u32 q0, q0, q4 - veor.u32 q1, q1, q5 - veor.u32 q2, q2, q6 - veor.u32 q3, q3, q7 - add r5, sp, #256+4*16 - vldmia r5, {q8-q11} - vstmia r4, {q0-q3} - veor.u32 q8, q8, q12 - veor.u32 q9, q9, q13 - veor.u32 q10, q10, q14 - veor.u32 q11, q11, q15 - vmov q12, q8 - vmov q13, q9 - vmov q14, q10 - vmov q15, q11 - - salsa8_core_3way - - ldr r0, [sp, #4*16+0*4] - ldr r3, [sp, #4*16+1*4] - ldr r2, [sp, #4*16+3*4] - mov r1, sp - add r0, r0, #16*4 - sub r2, r2, #1 - scrypt_core_macro3_x4 - and r4, r4, r2 - add r3, r3, r4, lsl #7 - pld [r3, #16*4] - pld [r3] - pld [r3, #24*4] - pld [r3, #8*4] - scrypt_core_macro3_x6 - scrypt_core_macro3_x6 - - add lr, sp, #128 - add r4, sp, #128+4*16 - vldmia r4, {q4-q7} - vadd.u32 q4, q4, q0 - vadd.u32 q5, q5, q1 - vadd.u32 q6, q6, q2 - vadd.u32 q7, q7, q3 - vstmia r4, {q4-q7} - vadd.u32 q12, q12, q8 - vadd.u32 q13, q13, q9 - vadd.u32 q14, q14, q10 - vadd.u32 q15, q15, q11 - add r5, sp, #256+4*16 - vstmia r5, {q12-q15} - - ldr r2, [sp, #4*16+2*4] - subs r2, r2, #1 - bne scrypt_core_3way_loop2 - - ldr r0, [sp, #4*16+0*4] - vldmia r0, {q8-q15} - vmov.u64 q0, #0xffffffff - vmov.u32 q1, q8 - vmov.u32 q2, q12 - vbif.u32 q8, q9, q0 - vbif.u32 q12, q13, q0 - vbif.u32 q9, q10, q0 - vbif.u32 q13, q14, q0 - vbif.u32 q10, q11, q0 - vbif.u32 q14, q15, q0 - vbif.u32 q11, q1, q0 - vbif.u32 q15, q2, q0 - add r12, sp, #8*16 - vldmia r12!, {q0-q7} - vswp.u32 d17, d21 - vswp.u32 d25, d29 - vswp.u32 d18, d22 - vswp.u32 d26, d30 - vstmia r0!, {q8-q15} - vmov.u64 q8, #0xffffffff - vmov.u32 q9, q0 - vmov.u32 q10, q4 - vbif.u32 q0, q1, q8 - vbif.u32 q4, q5, q8 - vbif.u32 q1, q2, q8 - vbif.u32 q5, q6, q8 - vbif.u32 q2, q3, q8 - vbif.u32 q6, q7, q8 - vbif.u32 q3, q9, q8 - vbif.u32 q7, q10, q8 - vldmia r12, {q8-q15} - vswp.u32 d1, d5 - vswp.u32 d9, d13 - vswp.u32 d2, d6 - vswp.u32 d10, d14 - vstmia r0!, {q0-q7} - vmov.u64 q0, #0xffffffff - vmov.u32 q1, q8 - vmov.u32 q2, q12 - vbif.u32 q8, q9, q0 - vbif.u32 q12, q13, q0 - vbif.u32 q9, q10, q0 - vbif.u32 q13, q14, q0 - vbif.u32 q10, q11, q0 - vbif.u32 q14, q15, q0 - vbif.u32 q11, q1, q0 - vbif.u32 q15, q2, q0 - vswp.u32 d17, d21 - vswp.u32 d25, d29 - vswp.u32 d18, d22 - vswp.u32 d26, d30 - vstmia r0, {q8-q15} - - ldr sp, [sp, #4*16+4*4] - vpop {q4-q7} - ldmfd sp!, {r4-r11, pc} - -#endif /* __ARM_NEON__ */ - -#endif diff --git a/asm/scrypt-x64.S b/asm/scrypt-x64.S deleted file mode 100644 index b45a2879..00000000 --- a/asm/scrypt-x64.S +++ /dev/null @@ -1,2907 +0,0 @@ -/* - * Copyright 2011-2014 pooler@litecoinpool.org - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include - -#if defined(__linux__) && defined(__ELF__) - .section .note.GNU-stack,"",%progbits -#endif - -#if defined(USE_ASM) && defined(__x86_64__) - - .text - .p2align 6 - .globl scrypt_best_throughput - .globl _scrypt_best_throughput -scrypt_best_throughput: -_scrypt_best_throughput: - pushq %rbx -#if defined(USE_AVX2) - /* Check for AVX and OSXSAVE support */ - movl $1, %eax - cpuid - andl $0x18000000, %ecx - cmpl $0x18000000, %ecx - jne scrypt_best_throughput_no_avx2 - /* Check for AVX2 support */ - movl $7, %eax - xorl %ecx, %ecx - cpuid - andl $0x00000020, %ebx - cmpl $0x00000020, %ebx - jne scrypt_best_throughput_no_avx2 - /* Check for XMM and YMM state support */ - xorl %ecx, %ecx - xgetbv - andl $0x00000006, %eax - cmpl $0x00000006, %eax - jne scrypt_best_throughput_no_avx2 - movl $6, %eax - jmp scrypt_best_throughput_exit -scrypt_best_throughput_no_avx2: -#endif - /* Check for AuthenticAMD */ - xorq %rax, %rax - cpuid - movl $3, %eax - cmpl $0x444d4163, %ecx - jne scrypt_best_throughput_not_amd - cmpl $0x69746e65, %edx - jne scrypt_best_throughput_not_amd - cmpl $0x68747541, %ebx - jne scrypt_best_throughput_not_amd - /* Check for AMD K8 or Bobcat */ - movl $1, %eax - cpuid - andl $0x0ff00000, %eax - jz scrypt_best_throughput_one - cmpl $0x00500000, %eax - je scrypt_best_throughput_one - movl $3, %eax - jmp scrypt_best_throughput_exit -scrypt_best_throughput_not_amd: - /* Check for GenuineIntel */ - cmpl $0x6c65746e, %ecx - jne scrypt_best_throughput_exit - cmpl $0x49656e69, %edx - jne scrypt_best_throughput_exit - cmpl $0x756e6547, %ebx - jne scrypt_best_throughput_exit - /* Check for Intel Atom */ - movl $1, %eax - cpuid - movl %eax, %edx - andl $0x0ff00f00, %eax - cmpl $0x00000600, %eax - movl $3, %eax - jnz scrypt_best_throughput_exit - andl $0x000f00f0, %edx - cmpl $0x000100c0, %edx - je scrypt_best_throughput_one - cmpl $0x00020060, %edx - je scrypt_best_throughput_one - cmpl $0x00030060, %edx - jne scrypt_best_throughput_exit -scrypt_best_throughput_one: - movl $1, %eax -scrypt_best_throughput_exit: - popq %rbx - ret - - -.macro scrypt_shuffle src, so, dest, do - movl \so+60(\src), %eax - movl \so+44(\src), %ebx - movl \so+28(\src), %ecx - movl \so+12(\src), %edx - movl %eax, \do+12(\dest) - movl %ebx, \do+28(\dest) - movl %ecx, \do+44(\dest) - movl %edx, \do+60(\dest) - movl \so+40(\src), %eax - movl \so+8(\src), %ebx - movl \so+48(\src), %ecx - movl \so+16(\src), %edx - movl %eax, \do+8(\dest) - movl %ebx, \do+40(\dest) - movl %ecx, \do+16(\dest) - movl %edx, \do+48(\dest) - movl \so+20(\src), %eax - movl \so+4(\src), %ebx - movl \so+52(\src), %ecx - movl \so+36(\src), %edx - movl %eax, \do+4(\dest) - movl %ebx, \do+20(\dest) - movl %ecx, \do+36(\dest) - movl %edx, \do+52(\dest) - movl \so+0(\src), %eax - movl \so+24(\src), %ebx - movl \so+32(\src), %ecx - movl \so+56(\src), %edx - movl %eax, \do+0(\dest) - movl %ebx, \do+24(\dest) - movl %ecx, \do+32(\dest) - movl %edx, \do+56(\dest) -.endm - - -.macro salsa8_core_gen_doubleround - movq 72(%rsp), %r15 - - leaq (%r14, %rdx), %rbp - roll $7, %ebp - xorl %ebp, %r9d - leaq (%rdi, %r15), %rbp - roll $7, %ebp - xorl %ebp, %r10d - leaq (%rdx, %r9), %rbp - roll $9, %ebp - xorl %ebp, %r11d - leaq (%r15, %r10), %rbp - roll $9, %ebp - xorl %ebp, %r13d - - leaq (%r9, %r11), %rbp - roll $13, %ebp - xorl %ebp, %r14d - leaq (%r10, %r13), %rbp - roll $13, %ebp - xorl %ebp, %edi - leaq (%r11, %r14), %rbp - roll $18, %ebp - xorl %ebp, %edx - leaq (%r13, %rdi), %rbp - roll $18, %ebp - xorl %ebp, %r15d - - movq 48(%rsp), %rbp - movq %r15, 72(%rsp) - - leaq (%rax, %rbp), %r15 - roll $7, %r15d - xorl %r15d, %ebx - leaq (%rbp, %rbx), %r15 - roll $9, %r15d - xorl %r15d, %ecx - leaq (%rbx, %rcx), %r15 - roll $13, %r15d - xorl %r15d, %eax - leaq (%rcx, %rax), %r15 - roll $18, %r15d - xorl %r15d, %ebp - - movq 88(%rsp), %r15 - movq %rbp, 48(%rsp) - - leaq (%r12, %r15), %rbp - roll $7, %ebp - xorl %ebp, %esi - leaq (%r15, %rsi), %rbp - roll $9, %ebp - xorl %ebp, %r8d - leaq (%rsi, %r8), %rbp - roll $13, %ebp - xorl %ebp, %r12d - leaq (%r8, %r12), %rbp - roll $18, %ebp - xorl %ebp, %r15d - - movq %r15, 88(%rsp) - movq 72(%rsp), %r15 - - leaq (%rsi, %rdx), %rbp - roll $7, %ebp - xorl %ebp, %edi - leaq (%r9, %r15), %rbp - roll $7, %ebp - xorl %ebp, %eax - leaq (%rdx, %rdi), %rbp - roll $9, %ebp - xorl %ebp, %ecx - leaq (%r15, %rax), %rbp - roll $9, %ebp - xorl %ebp, %r8d - - leaq (%rdi, %rcx), %rbp - roll $13, %ebp - xorl %ebp, %esi - leaq (%rax, %r8), %rbp - roll $13, %ebp - xorl %ebp, %r9d - leaq (%rcx, %rsi), %rbp - roll $18, %ebp - xorl %ebp, %edx - leaq (%r8, %r9), %rbp - roll $18, %ebp - xorl %ebp, %r15d - - movq 48(%rsp), %rbp - movq %r15, 72(%rsp) - - leaq (%r10, %rbp), %r15 - roll $7, %r15d - xorl %r15d, %r12d - leaq (%rbp, %r12), %r15 - roll $9, %r15d - xorl %r15d, %r11d - leaq (%r12, %r11), %r15 - roll $13, %r15d - xorl %r15d, %r10d - leaq (%r11, %r10), %r15 - roll $18, %r15d - xorl %r15d, %ebp - - movq 88(%rsp), %r15 - movq %rbp, 48(%rsp) - - leaq (%rbx, %r15), %rbp - roll $7, %ebp - xorl %ebp, %r14d - leaq (%r15, %r14), %rbp - roll $9, %ebp - xorl %ebp, %r13d - leaq (%r14, %r13), %rbp - roll $13, %ebp - xorl %ebp, %ebx - leaq (%r13, %rbx), %rbp - roll $18, %ebp - xorl %ebp, %r15d - - movq %r15, 88(%rsp) -.endm - - .text - .p2align 6 -salsa8_core_gen: - /* 0: %rdx, %rdi, %rcx, %rsi */ - movq 8(%rsp), %rdi - movq %rdi, %rdx - shrq $32, %rdi - movq 16(%rsp), %rsi - movq %rsi, %rcx - shrq $32, %rsi - /* 1: %r9, 72(%rsp), %rax, %r8 */ - movq 24(%rsp), %r8 - movq %r8, %r9 - shrq $32, %r8 - movq %r8, 72(%rsp) - movq 32(%rsp), %r8 - movq %r8, %rax - shrq $32, %r8 - /* 2: %r11, %r10, 48(%rsp), %r12 */ - movq 40(%rsp), %r10 - movq %r10, %r11 - shrq $32, %r10 - movq 48(%rsp), %r12 - /* movq %r12, %r13 */ - /* movq %r13, 48(%rsp) */ - shrq $32, %r12 - /* 3: %r14, %r13, %rbx, 88(%rsp) */ - movq 56(%rsp), %r13 - movq %r13, %r14 - shrq $32, %r13 - movq 64(%rsp), %r15 - movq %r15, %rbx - shrq $32, %r15 - movq %r15, 88(%rsp) - - salsa8_core_gen_doubleround - salsa8_core_gen_doubleround - salsa8_core_gen_doubleround - salsa8_core_gen_doubleround - - shlq $32, %rdi - xorq %rdi, %rdx - movq %rdx, 24(%rsp) - - shlq $32, %rsi - xorq %rsi, %rcx - movq %rcx, 32(%rsp) - - movl 72(%rsp), %edi - shlq $32, %rdi - xorq %rdi, %r9 - movq %r9, 40(%rsp) - - movl 48(%rsp), %ebp - shlq $32, %r8 - xorq %r8, %rax - movq %rax, 48(%rsp) - - shlq $32, %r10 - xorq %r10, %r11 - movq %r11, 56(%rsp) - - shlq $32, %r12 - xorq %r12, %rbp - movq %rbp, 64(%rsp) - - shlq $32, %r13 - xorq %r13, %r14 - movq %r14, 72(%rsp) - - movdqa 24(%rsp), %xmm0 - - shlq $32, %r15 - xorq %r15, %rbx - movq %rbx, 80(%rsp) - - movdqa 40(%rsp), %xmm1 - movdqa 56(%rsp), %xmm2 - movdqa 72(%rsp), %xmm3 - - ret - - - .text - .p2align 6 - .globl scrypt_core - .globl _scrypt_core -scrypt_core: -_scrypt_core: - pushq %rbx - pushq %rbp - pushq %r12 - pushq %r13 - pushq %r14 - pushq %r15 -#if defined(_WIN64) || defined(__CYGWIN__) - subq $176, %rsp - movdqa %xmm6, 8(%rsp) - movdqa %xmm7, 24(%rsp) - movdqa %xmm8, 40(%rsp) - movdqa %xmm9, 56(%rsp) - movdqa %xmm10, 72(%rsp) - movdqa %xmm11, 88(%rsp) - movdqa %xmm12, 104(%rsp) - movdqa %xmm13, 120(%rsp) - movdqa %xmm14, 136(%rsp) - movdqa %xmm15, 152(%rsp) - pushq %rdi - pushq %rsi - movq %rcx, %rdi - movq %rdx, %rsi -#else - movq %rdx, %r8 -#endif - -.macro scrypt_core_cleanup -#if defined(_WIN64) || defined(__CYGWIN__) - popq %rsi - popq %rdi - movdqa 8(%rsp), %xmm6 - movdqa 24(%rsp), %xmm7 - movdqa 40(%rsp), %xmm8 - movdqa 56(%rsp), %xmm9 - movdqa 72(%rsp), %xmm10 - movdqa 88(%rsp), %xmm11 - movdqa 104(%rsp), %xmm12 - movdqa 120(%rsp), %xmm13 - movdqa 136(%rsp), %xmm14 - movdqa 152(%rsp), %xmm15 - addq $176, %rsp -#endif - popq %r15 - popq %r14 - popq %r13 - popq %r12 - popq %rbp - popq %rbx -.endm - - /* GenuineIntel processors have fast SIMD */ - xorl %eax, %eax - cpuid - cmpl $0x6c65746e, %ecx - jne scrypt_core_gen - cmpl $0x49656e69, %edx - jne scrypt_core_gen - cmpl $0x756e6547, %ebx - je scrypt_core_xmm - - .p2align 6 -scrypt_core_gen: - subq $136, %rsp - movdqa 0(%rdi), %xmm8 - movdqa 16(%rdi), %xmm9 - movdqa 32(%rdi), %xmm10 - movdqa 48(%rdi), %xmm11 - movdqa 64(%rdi), %xmm12 - movdqa 80(%rdi), %xmm13 - movdqa 96(%rdi), %xmm14 - movdqa 112(%rdi), %xmm15 - - movq %r8, %rcx - shlq $7, %rcx - addq %rsi, %rcx - movq %r8, 96(%rsp) - movq %rdi, 104(%rsp) - movq %rsi, 112(%rsp) - movq %rcx, 120(%rsp) -scrypt_core_gen_loop1: - movdqa %xmm8, 0(%rsi) - movdqa %xmm9, 16(%rsi) - movdqa %xmm10, 32(%rsi) - movdqa %xmm11, 48(%rsi) - movdqa %xmm12, 64(%rsi) - movdqa %xmm13, 80(%rsi) - movdqa %xmm14, 96(%rsi) - movdqa %xmm15, 112(%rsi) - - pxor %xmm12, %xmm8 - pxor %xmm13, %xmm9 - pxor %xmm14, %xmm10 - pxor %xmm15, %xmm11 - movdqa %xmm8, 0(%rsp) - movdqa %xmm9, 16(%rsp) - movdqa %xmm10, 32(%rsp) - movdqa %xmm11, 48(%rsp) - movq %rsi, 128(%rsp) - call salsa8_core_gen - paddd %xmm0, %xmm8 - paddd %xmm1, %xmm9 - paddd %xmm2, %xmm10 - paddd %xmm3, %xmm11 - - pxor %xmm8, %xmm12 - pxor %xmm9, %xmm13 - pxor %xmm10, %xmm14 - pxor %xmm11, %xmm15 - movdqa %xmm12, 0(%rsp) - movdqa %xmm13, 16(%rsp) - movdqa %xmm14, 32(%rsp) - movdqa %xmm15, 48(%rsp) - call salsa8_core_gen - movq 128(%rsp), %rsi - paddd %xmm0, %xmm12 - paddd %xmm1, %xmm13 - paddd %xmm2, %xmm14 - paddd %xmm3, %xmm15 - - addq $128, %rsi - movq 120(%rsp), %rcx - cmpq %rcx, %rsi - jne scrypt_core_gen_loop1 - - movq 96(%rsp), %r8 - movq %r8, %rcx - subl $1, %r8d - movq %r8, 96(%rsp) - movd %xmm12, %edx -scrypt_core_gen_loop2: - movq 112(%rsp), %rsi - andl %r8d, %edx - shll $7, %edx - addq %rsi, %rdx - movdqa 0(%rdx), %xmm0 - movdqa 16(%rdx), %xmm1 - movdqa 32(%rdx), %xmm2 - movdqa 48(%rdx), %xmm3 - movdqa 64(%rdx), %xmm4 - movdqa 80(%rdx), %xmm5 - movdqa 96(%rdx), %xmm6 - movdqa 112(%rdx), %xmm7 - pxor %xmm0, %xmm8 - pxor %xmm1, %xmm9 - pxor %xmm2, %xmm10 - pxor %xmm3, %xmm11 - pxor %xmm4, %xmm12 - pxor %xmm5, %xmm13 - pxor %xmm6, %xmm14 - pxor %xmm7, %xmm15 - - pxor %xmm12, %xmm8 - pxor %xmm13, %xmm9 - pxor %xmm14, %xmm10 - pxor %xmm15, %xmm11 - movdqa %xmm8, 0(%rsp) - movdqa %xmm9, 16(%rsp) - movdqa %xmm10, 32(%rsp) - movdqa %xmm11, 48(%rsp) - movq %rcx, 128(%rsp) - call salsa8_core_gen - paddd %xmm0, %xmm8 - paddd %xmm1, %xmm9 - paddd %xmm2, %xmm10 - paddd %xmm3, %xmm11 - - pxor %xmm8, %xmm12 - pxor %xmm9, %xmm13 - pxor %xmm10, %xmm14 - pxor %xmm11, %xmm15 - movdqa %xmm12, 0(%rsp) - movdqa %xmm13, 16(%rsp) - movdqa %xmm14, 32(%rsp) - movdqa %xmm15, 48(%rsp) - call salsa8_core_gen - movq 96(%rsp), %r8 - movq 128(%rsp), %rcx - addl 0(%rsp), %edx - paddd %xmm0, %xmm12 - paddd %xmm1, %xmm13 - paddd %xmm2, %xmm14 - paddd %xmm3, %xmm15 - - subq $1, %rcx - ja scrypt_core_gen_loop2 - - movq 104(%rsp), %rdi - movdqa %xmm8, 0(%rdi) - movdqa %xmm9, 16(%rdi) - movdqa %xmm10, 32(%rdi) - movdqa %xmm11, 48(%rdi) - movdqa %xmm12, 64(%rdi) - movdqa %xmm13, 80(%rdi) - movdqa %xmm14, 96(%rdi) - movdqa %xmm15, 112(%rdi) - - addq $136, %rsp - scrypt_core_cleanup - ret - - -.macro salsa8_core_xmm_doubleround - movdqa %xmm1, %xmm4 - paddd %xmm0, %xmm4 - movdqa %xmm4, %xmm5 - pslld $7, %xmm4 - psrld $25, %xmm5 - pxor %xmm4, %xmm3 - movdqa %xmm0, %xmm4 - pxor %xmm5, %xmm3 - - paddd %xmm3, %xmm4 - movdqa %xmm4, %xmm5 - pslld $9, %xmm4 - psrld $23, %xmm5 - pxor %xmm4, %xmm2 - movdqa %xmm3, %xmm4 - pxor %xmm5, %xmm2 - pshufd $0x93, %xmm3, %xmm3 - - paddd %xmm2, %xmm4 - movdqa %xmm4, %xmm5 - pslld $13, %xmm4 - psrld $19, %xmm5 - pxor %xmm4, %xmm1 - movdqa %xmm2, %xmm4 - pxor %xmm5, %xmm1 - pshufd $0x4e, %xmm2, %xmm2 - - paddd %xmm1, %xmm4 - movdqa %xmm4, %xmm5 - pslld $18, %xmm4 - psrld $14, %xmm5 - pxor %xmm4, %xmm0 - movdqa %xmm3, %xmm4 - pxor %xmm5, %xmm0 - pshufd $0x39, %xmm1, %xmm1 - - paddd %xmm0, %xmm4 - movdqa %xmm4, %xmm5 - pslld $7, %xmm4 - psrld $25, %xmm5 - pxor %xmm4, %xmm1 - movdqa %xmm0, %xmm4 - pxor %xmm5, %xmm1 - - paddd %xmm1, %xmm4 - movdqa %xmm4, %xmm5 - pslld $9, %xmm4 - psrld $23, %xmm5 - pxor %xmm4, %xmm2 - movdqa %xmm1, %xmm4 - pxor %xmm5, %xmm2 - pshufd $0x93, %xmm1, %xmm1 - - paddd %xmm2, %xmm4 - movdqa %xmm4, %xmm5 - pslld $13, %xmm4 - psrld $19, %xmm5 - pxor %xmm4, %xmm3 - movdqa %xmm2, %xmm4 - pxor %xmm5, %xmm3 - pshufd $0x4e, %xmm2, %xmm2 - - paddd %xmm3, %xmm4 - movdqa %xmm4, %xmm5 - pslld $18, %xmm4 - psrld $14, %xmm5 - pxor %xmm4, %xmm0 - pshufd $0x39, %xmm3, %xmm3 - pxor %xmm5, %xmm0 -.endm - -.macro salsa8_core_xmm - salsa8_core_xmm_doubleround - salsa8_core_xmm_doubleround - salsa8_core_xmm_doubleround - salsa8_core_xmm_doubleround -.endm - - .p2align 6 -scrypt_core_xmm: - pcmpeqw %xmm1, %xmm1 - psrlq $32, %xmm1 - - movdqa 0(%rdi), %xmm8 - movdqa 16(%rdi), %xmm11 - movdqa 32(%rdi), %xmm10 - movdqa 48(%rdi), %xmm9 - movdqa %xmm8, %xmm0 - pxor %xmm11, %xmm8 - pand %xmm1, %xmm8 - pxor %xmm11, %xmm8 - pxor %xmm10, %xmm11 - pand %xmm1, %xmm11 - pxor %xmm10, %xmm11 - pxor %xmm9, %xmm10 - pand %xmm1, %xmm10 - pxor %xmm9, %xmm10 - pxor %xmm0, %xmm9 - pand %xmm1, %xmm9 - pxor %xmm0, %xmm9 - movdqa %xmm8, %xmm0 - pshufd $0x4e, %xmm10, %xmm10 - punpcklqdq %xmm10, %xmm8 - punpckhqdq %xmm0, %xmm10 - movdqa %xmm11, %xmm0 - pshufd $0x4e, %xmm9, %xmm9 - punpcklqdq %xmm9, %xmm11 - punpckhqdq %xmm0, %xmm9 - - movdqa 64(%rdi), %xmm12 - movdqa 80(%rdi), %xmm15 - movdqa 96(%rdi), %xmm14 - movdqa 112(%rdi), %xmm13 - movdqa %xmm12, %xmm0 - pxor %xmm15, %xmm12 - pand %xmm1, %xmm12 - pxor %xmm15, %xmm12 - pxor %xmm14, %xmm15 - pand %xmm1, %xmm15 - pxor %xmm14, %xmm15 - pxor %xmm13, %xmm14 - pand %xmm1, %xmm14 - pxor %xmm13, %xmm14 - pxor %xmm0, %xmm13 - pand %xmm1, %xmm13 - pxor %xmm0, %xmm13 - movdqa %xmm12, %xmm0 - pshufd $0x4e, %xmm14, %xmm14 - punpcklqdq %xmm14, %xmm12 - punpckhqdq %xmm0, %xmm14 - movdqa %xmm15, %xmm0 - pshufd $0x4e, %xmm13, %xmm13 - punpcklqdq %xmm13, %xmm15 - punpckhqdq %xmm0, %xmm13 - - movq %rsi, %rdx - movq %r8, %rcx - shlq $7, %rcx - addq %rsi, %rcx -scrypt_core_xmm_loop1: - pxor %xmm12, %xmm8 - pxor %xmm13, %xmm9 - pxor %xmm14, %xmm10 - pxor %xmm15, %xmm11 - movdqa %xmm8, 0(%rdx) - movdqa %xmm9, 16(%rdx) - movdqa %xmm10, 32(%rdx) - movdqa %xmm11, 48(%rdx) - movdqa %xmm12, 64(%rdx) - movdqa %xmm13, 80(%rdx) - movdqa %xmm14, 96(%rdx) - movdqa %xmm15, 112(%rdx) - - movdqa %xmm8, %xmm0 - movdqa %xmm9, %xmm1 - movdqa %xmm10, %xmm2 - movdqa %xmm11, %xmm3 - salsa8_core_xmm - paddd %xmm0, %xmm8 - paddd %xmm1, %xmm9 - paddd %xmm2, %xmm10 - paddd %xmm3, %xmm11 - - pxor %xmm8, %xmm12 - pxor %xmm9, %xmm13 - pxor %xmm10, %xmm14 - pxor %xmm11, %xmm15 - movdqa %xmm12, %xmm0 - movdqa %xmm13, %xmm1 - movdqa %xmm14, %xmm2 - movdqa %xmm15, %xmm3 - salsa8_core_xmm - paddd %xmm0, %xmm12 - paddd %xmm1, %xmm13 - paddd %xmm2, %xmm14 - paddd %xmm3, %xmm15 - - addq $128, %rdx - cmpq %rcx, %rdx - jne scrypt_core_xmm_loop1 - - movq %r8, %rcx - subl $1, %r8d -scrypt_core_xmm_loop2: - movd %xmm12, %edx - andl %r8d, %edx - shll $7, %edx - pxor 0(%rsi, %rdx), %xmm8 - pxor 16(%rsi, %rdx), %xmm9 - pxor 32(%rsi, %rdx), %xmm10 - pxor 48(%rsi, %rdx), %xmm11 - - pxor %xmm12, %xmm8 - pxor %xmm13, %xmm9 - pxor %xmm14, %xmm10 - pxor %xmm15, %xmm11 - movdqa %xmm8, %xmm0 - movdqa %xmm9, %xmm1 - movdqa %xmm10, %xmm2 - movdqa %xmm11, %xmm3 - salsa8_core_xmm - paddd %xmm0, %xmm8 - paddd %xmm1, %xmm9 - paddd %xmm2, %xmm10 - paddd %xmm3, %xmm11 - - pxor 64(%rsi, %rdx), %xmm12 - pxor 80(%rsi, %rdx), %xmm13 - pxor 96(%rsi, %rdx), %xmm14 - pxor 112(%rsi, %rdx), %xmm15 - pxor %xmm8, %xmm12 - pxor %xmm9, %xmm13 - pxor %xmm10, %xmm14 - pxor %xmm11, %xmm15 - movdqa %xmm12, %xmm0 - movdqa %xmm13, %xmm1 - movdqa %xmm14, %xmm2 - movdqa %xmm15, %xmm3 - salsa8_core_xmm - paddd %xmm0, %xmm12 - paddd %xmm1, %xmm13 - paddd %xmm2, %xmm14 - paddd %xmm3, %xmm15 - - subq $1, %rcx - ja scrypt_core_xmm_loop2 - - pcmpeqw %xmm1, %xmm1 - psrlq $32, %xmm1 - - movdqa %xmm8, %xmm0 - pxor %xmm9, %xmm8 - pand %xmm1, %xmm8 - pxor %xmm9, %xmm8 - pxor %xmm10, %xmm9 - pand %xmm1, %xmm9 - pxor %xmm10, %xmm9 - pxor %xmm11, %xmm10 - pand %xmm1, %xmm10 - pxor %xmm11, %xmm10 - pxor %xmm0, %xmm11 - pand %xmm1, %xmm11 - pxor %xmm0, %xmm11 - movdqa %xmm8, %xmm0 - pshufd $0x4e, %xmm10, %xmm10 - punpcklqdq %xmm10, %xmm8 - punpckhqdq %xmm0, %xmm10 - movdqa %xmm9, %xmm0 - pshufd $0x4e, %xmm11, %xmm11 - punpcklqdq %xmm11, %xmm9 - punpckhqdq %xmm0, %xmm11 - movdqa %xmm8, 0(%rdi) - movdqa %xmm11, 16(%rdi) - movdqa %xmm10, 32(%rdi) - movdqa %xmm9, 48(%rdi) - - movdqa %xmm12, %xmm0 - pxor %xmm13, %xmm12 - pand %xmm1, %xmm12 - pxor %xmm13, %xmm12 - pxor %xmm14, %xmm13 - pand %xmm1, %xmm13 - pxor %xmm14, %xmm13 - pxor %xmm15, %xmm14 - pand %xmm1, %xmm14 - pxor %xmm15, %xmm14 - pxor %xmm0, %xmm15 - pand %xmm1, %xmm15 - pxor %xmm0, %xmm15 - movdqa %xmm12, %xmm0 - pshufd $0x4e, %xmm14, %xmm14 - punpcklqdq %xmm14, %xmm12 - punpckhqdq %xmm0, %xmm14 - movdqa %xmm13, %xmm0 - pshufd $0x4e, %xmm15, %xmm15 - punpcklqdq %xmm15, %xmm13 - punpckhqdq %xmm0, %xmm15 - movdqa %xmm12, 64(%rdi) - movdqa %xmm15, 80(%rdi) - movdqa %xmm14, 96(%rdi) - movdqa %xmm13, 112(%rdi) - - scrypt_core_cleanup - ret - - -#if defined(USE_AVX) -.macro salsa8_core_3way_avx_doubleround - vpaddd %xmm0, %xmm1, %xmm4 - vpaddd %xmm8, %xmm9, %xmm6 - vpaddd %xmm12, %xmm13, %xmm7 - vpslld $7, %xmm4, %xmm5 - vpsrld $25, %xmm4, %xmm4 - vpxor %xmm5, %xmm3, %xmm3 - vpxor %xmm4, %xmm3, %xmm3 - vpslld $7, %xmm6, %xmm5 - vpsrld $25, %xmm6, %xmm6 - vpxor %xmm5, %xmm11, %xmm11 - vpxor %xmm6, %xmm11, %xmm11 - vpslld $7, %xmm7, %xmm5 - vpsrld $25, %xmm7, %xmm7 - vpxor %xmm5, %xmm15, %xmm15 - vpxor %xmm7, %xmm15, %xmm15 - - vpaddd %xmm3, %xmm0, %xmm4 - vpaddd %xmm11, %xmm8, %xmm6 - vpaddd %xmm15, %xmm12, %xmm7 - vpslld $9, %xmm4, %xmm5 - vpsrld $23, %xmm4, %xmm4 - vpxor %xmm5, %xmm2, %xmm2 - vpxor %xmm4, %xmm2, %xmm2 - vpslld $9, %xmm6, %xmm5 - vpsrld $23, %xmm6, %xmm6 - vpxor %xmm5, %xmm10, %xmm10 - vpxor %xmm6, %xmm10, %xmm10 - vpslld $9, %xmm7, %xmm5 - vpsrld $23, %xmm7, %xmm7 - vpxor %xmm5, %xmm14, %xmm14 - vpxor %xmm7, %xmm14, %xmm14 - - vpaddd %xmm2, %xmm3, %xmm4 - vpaddd %xmm10, %xmm11, %xmm6 - vpaddd %xmm14, %xmm15, %xmm7 - vpslld $13, %xmm4, %xmm5 - vpsrld $19, %xmm4, %xmm4 - vpshufd $0x93, %xmm3, %xmm3 - vpshufd $0x93, %xmm11, %xmm11 - vpshufd $0x93, %xmm15, %xmm15 - vpxor %xmm5, %xmm1, %xmm1 - vpxor %xmm4, %xmm1, %xmm1 - vpslld $13, %xmm6, %xmm5 - vpsrld $19, %xmm6, %xmm6 - vpxor %xmm5, %xmm9, %xmm9 - vpxor %xmm6, %xmm9, %xmm9 - vpslld $13, %xmm7, %xmm5 - vpsrld $19, %xmm7, %xmm7 - vpxor %xmm5, %xmm13, %xmm13 - vpxor %xmm7, %xmm13, %xmm13 - - vpaddd %xmm1, %xmm2, %xmm4 - vpaddd %xmm9, %xmm10, %xmm6 - vpaddd %xmm13, %xmm14, %xmm7 - vpslld $18, %xmm4, %xmm5 - vpsrld $14, %xmm4, %xmm4 - vpshufd $0x4e, %xmm2, %xmm2 - vpshufd $0x4e, %xmm10, %xmm10 - vpshufd $0x4e, %xmm14, %xmm14 - vpxor %xmm5, %xmm0, %xmm0 - vpxor %xmm4, %xmm0, %xmm0 - vpslld $18, %xmm6, %xmm5 - vpsrld $14, %xmm6, %xmm6 - vpxor %xmm5, %xmm8, %xmm8 - vpxor %xmm6, %xmm8, %xmm8 - vpslld $18, %xmm7, %xmm5 - vpsrld $14, %xmm7, %xmm7 - vpxor %xmm5, %xmm12, %xmm12 - vpxor %xmm7, %xmm12, %xmm12 - - vpaddd %xmm0, %xmm3, %xmm4 - vpaddd %xmm8, %xmm11, %xmm6 - vpaddd %xmm12, %xmm15, %xmm7 - vpslld $7, %xmm4, %xmm5 - vpsrld $25, %xmm4, %xmm4 - vpshufd $0x39, %xmm1, %xmm1 - vpxor %xmm5, %xmm1, %xmm1 - vpxor %xmm4, %xmm1, %xmm1 - vpslld $7, %xmm6, %xmm5 - vpsrld $25, %xmm6, %xmm6 - vpshufd $0x39, %xmm9, %xmm9 - vpxor %xmm5, %xmm9, %xmm9 - vpxor %xmm6, %xmm9, %xmm9 - vpslld $7, %xmm7, %xmm5 - vpsrld $25, %xmm7, %xmm7 - vpshufd $0x39, %xmm13, %xmm13 - vpxor %xmm5, %xmm13, %xmm13 - vpxor %xmm7, %xmm13, %xmm13 - - vpaddd %xmm1, %xmm0, %xmm4 - vpaddd %xmm9, %xmm8, %xmm6 - vpaddd %xmm13, %xmm12, %xmm7 - vpslld $9, %xmm4, %xmm5 - vpsrld $23, %xmm4, %xmm4 - vpxor %xmm5, %xmm2, %xmm2 - vpxor %xmm4, %xmm2, %xmm2 - vpslld $9, %xmm6, %xmm5 - vpsrld $23, %xmm6, %xmm6 - vpxor %xmm5, %xmm10, %xmm10 - vpxor %xmm6, %xmm10, %xmm10 - vpslld $9, %xmm7, %xmm5 - vpsrld $23, %xmm7, %xmm7 - vpxor %xmm5, %xmm14, %xmm14 - vpxor %xmm7, %xmm14, %xmm14 - - vpaddd %xmm2, %xmm1, %xmm4 - vpaddd %xmm10, %xmm9, %xmm6 - vpaddd %xmm14, %xmm13, %xmm7 - vpslld $13, %xmm4, %xmm5 - vpsrld $19, %xmm4, %xmm4 - vpshufd $0x93, %xmm1, %xmm1 - vpshufd $0x93, %xmm9, %xmm9 - vpshufd $0x93, %xmm13, %xmm13 - vpxor %xmm5, %xmm3, %xmm3 - vpxor %xmm4, %xmm3, %xmm3 - vpslld $13, %xmm6, %xmm5 - vpsrld $19, %xmm6, %xmm6 - vpxor %xmm5, %xmm11, %xmm11 - vpxor %xmm6, %xmm11, %xmm11 - vpslld $13, %xmm7, %xmm5 - vpsrld $19, %xmm7, %xmm7 - vpxor %xmm5, %xmm15, %xmm15 - vpxor %xmm7, %xmm15, %xmm15 - - vpaddd %xmm3, %xmm2, %xmm4 - vpaddd %xmm11, %xmm10, %xmm6 - vpaddd %xmm15, %xmm14, %xmm7 - vpslld $18, %xmm4, %xmm5 - vpsrld $14, %xmm4, %xmm4 - vpshufd $0x4e, %xmm2, %xmm2 - vpshufd $0x4e, %xmm10, %xmm10 - vpxor %xmm5, %xmm0, %xmm0 - vpxor %xmm4, %xmm0, %xmm0 - vpslld $18, %xmm6, %xmm5 - vpsrld $14, %xmm6, %xmm6 - vpshufd $0x4e, %xmm14, %xmm14 - vpshufd $0x39, %xmm11, %xmm11 - vpxor %xmm5, %xmm8, %xmm8 - vpxor %xmm6, %xmm8, %xmm8 - vpslld $18, %xmm7, %xmm5 - vpsrld $14, %xmm7, %xmm7 - vpshufd $0x39, %xmm3, %xmm3 - vpshufd $0x39, %xmm15, %xmm15 - vpxor %xmm5, %xmm12, %xmm12 - vpxor %xmm7, %xmm12, %xmm12 -.endm - -.macro salsa8_core_3way_avx - salsa8_core_3way_avx_doubleround - salsa8_core_3way_avx_doubleround - salsa8_core_3way_avx_doubleround - salsa8_core_3way_avx_doubleround -.endm -#endif /* USE_AVX */ - - .text - .p2align 6 - .globl scrypt_core_3way - .globl _scrypt_core_3way -scrypt_core_3way: -_scrypt_core_3way: - pushq %rbx - pushq %rbp -#if defined(_WIN64) || defined(__CYGWIN__) - subq $176, %rsp - movdqa %xmm6, 8(%rsp) - movdqa %xmm7, 24(%rsp) - movdqa %xmm8, 40(%rsp) - movdqa %xmm9, 56(%rsp) - movdqa %xmm10, 72(%rsp) - movdqa %xmm11, 88(%rsp) - movdqa %xmm12, 104(%rsp) - movdqa %xmm13, 120(%rsp) - movdqa %xmm14, 136(%rsp) - movdqa %xmm15, 152(%rsp) - pushq %rdi - pushq %rsi - movq %rcx, %rdi - movq %rdx, %rsi -#else - movq %rdx, %r8 -#endif - subq $392, %rsp - -.macro scrypt_core_3way_cleanup - addq $392, %rsp -#if defined(_WIN64) || defined(__CYGWIN__) - popq %rsi - popq %rdi - movdqa 8(%rsp), %xmm6 - movdqa 24(%rsp), %xmm7 - movdqa 40(%rsp), %xmm8 - movdqa 56(%rsp), %xmm9 - movdqa 72(%rsp), %xmm10 - movdqa 88(%rsp), %xmm11 - movdqa 104(%rsp), %xmm12 - movdqa 120(%rsp), %xmm13 - movdqa 136(%rsp), %xmm14 - movdqa 152(%rsp), %xmm15 - addq $176, %rsp -#endif - popq %rbp - popq %rbx -.endm - -#if !defined(USE_AVX) - jmp scrypt_core_3way_xmm -#else - /* Check for AVX and OSXSAVE support */ - movl $1, %eax - cpuid - andl $0x18000000, %ecx - cmpl $0x18000000, %ecx - jne scrypt_core_3way_xmm - /* Check for XMM and YMM state support */ - xorl %ecx, %ecx - xgetbv - andl $0x00000006, %eax - cmpl $0x00000006, %eax - jne scrypt_core_3way_xmm -#if defined(USE_XOP) - /* Check for XOP support */ - movl $0x80000001, %eax - cpuid - andl $0x00000800, %ecx - jnz scrypt_core_3way_xop -#endif - -scrypt_core_3way_avx: - scrypt_shuffle %rdi, 0, %rsp, 0 - scrypt_shuffle %rdi, 64, %rsp, 64 - scrypt_shuffle %rdi, 128, %rsp, 128 - scrypt_shuffle %rdi, 192, %rsp, 192 - scrypt_shuffle %rdi, 256, %rsp, 256 - scrypt_shuffle %rdi, 320, %rsp, 320 - - movdqa 64(%rsp), %xmm0 - movdqa 80(%rsp), %xmm1 - movdqa 96(%rsp), %xmm2 - movdqa 112(%rsp), %xmm3 - movdqa 128+64(%rsp), %xmm8 - movdqa 128+80(%rsp), %xmm9 - movdqa 128+96(%rsp), %xmm10 - movdqa 128+112(%rsp), %xmm11 - movdqa 256+64(%rsp), %xmm12 - movdqa 256+80(%rsp), %xmm13 - movdqa 256+96(%rsp), %xmm14 - movdqa 256+112(%rsp), %xmm15 - - movq %rsi, %rbx - leaq (%r8, %r8, 2), %rax - shlq $7, %rax - addq %rsi, %rax -scrypt_core_3way_avx_loop1: - movdqa %xmm0, 64(%rbx) - movdqa %xmm1, 80(%rbx) - movdqa %xmm2, 96(%rbx) - movdqa %xmm3, 112(%rbx) - pxor 0(%rsp), %xmm0 - pxor 16(%rsp), %xmm1 - pxor 32(%rsp), %xmm2 - pxor 48(%rsp), %xmm3 - movdqa %xmm8, 128+64(%rbx) - movdqa %xmm9, 128+80(%rbx) - movdqa %xmm10, 128+96(%rbx) - movdqa %xmm11, 128+112(%rbx) - pxor 128+0(%rsp), %xmm8 - pxor 128+16(%rsp), %xmm9 - pxor 128+32(%rsp), %xmm10 - pxor 128+48(%rsp), %xmm11 - movdqa %xmm12, 256+64(%rbx) - movdqa %xmm13, 256+80(%rbx) - movdqa %xmm14, 256+96(%rbx) - movdqa %xmm15, 256+112(%rbx) - pxor 256+0(%rsp), %xmm12 - pxor 256+16(%rsp), %xmm13 - pxor 256+32(%rsp), %xmm14 - pxor 256+48(%rsp), %xmm15 - movdqa %xmm0, 0(%rbx) - movdqa %xmm1, 16(%rbx) - movdqa %xmm2, 32(%rbx) - movdqa %xmm3, 48(%rbx) - movdqa %xmm8, 128+0(%rbx) - movdqa %xmm9, 128+16(%rbx) - movdqa %xmm10, 128+32(%rbx) - movdqa %xmm11, 128+48(%rbx) - movdqa %xmm12, 256+0(%rbx) - movdqa %xmm13, 256+16(%rbx) - movdqa %xmm14, 256+32(%rbx) - movdqa %xmm15, 256+48(%rbx) - - salsa8_core_3way_avx - paddd 0(%rbx), %xmm0 - paddd 16(%rbx), %xmm1 - paddd 32(%rbx), %xmm2 - paddd 48(%rbx), %xmm3 - paddd 128+0(%rbx), %xmm8 - paddd 128+16(%rbx), %xmm9 - paddd 128+32(%rbx), %xmm10 - paddd 128+48(%rbx), %xmm11 - paddd 256+0(%rbx), %xmm12 - paddd 256+16(%rbx), %xmm13 - paddd 256+32(%rbx), %xmm14 - paddd 256+48(%rbx), %xmm15 - movdqa %xmm0, 0(%rsp) - movdqa %xmm1, 16(%rsp) - movdqa %xmm2, 32(%rsp) - movdqa %xmm3, 48(%rsp) - movdqa %xmm8, 128+0(%rsp) - movdqa %xmm9, 128+16(%rsp) - movdqa %xmm10, 128+32(%rsp) - movdqa %xmm11, 128+48(%rsp) - movdqa %xmm12, 256+0(%rsp) - movdqa %xmm13, 256+16(%rsp) - movdqa %xmm14, 256+32(%rsp) - movdqa %xmm15, 256+48(%rsp) - - pxor 64(%rbx), %xmm0 - pxor 80(%rbx), %xmm1 - pxor 96(%rbx), %xmm2 - pxor 112(%rbx), %xmm3 - pxor 128+64(%rbx), %xmm8 - pxor 128+80(%rbx), %xmm9 - pxor 128+96(%rbx), %xmm10 - pxor 128+112(%rbx), %xmm11 - pxor 256+64(%rbx), %xmm12 - pxor 256+80(%rbx), %xmm13 - pxor 256+96(%rbx), %xmm14 - pxor 256+112(%rbx), %xmm15 - movdqa %xmm0, 64(%rsp) - movdqa %xmm1, 80(%rsp) - movdqa %xmm2, 96(%rsp) - movdqa %xmm3, 112(%rsp) - movdqa %xmm8, 128+64(%rsp) - movdqa %xmm9, 128+80(%rsp) - movdqa %xmm10, 128+96(%rsp) - movdqa %xmm11, 128+112(%rsp) - movdqa %xmm12, 256+64(%rsp) - movdqa %xmm13, 256+80(%rsp) - movdqa %xmm14, 256+96(%rsp) - movdqa %xmm15, 256+112(%rsp) - salsa8_core_3way_avx - paddd 64(%rsp), %xmm0 - paddd 80(%rsp), %xmm1 - paddd 96(%rsp), %xmm2 - paddd 112(%rsp), %xmm3 - paddd 128+64(%rsp), %xmm8 - paddd 128+80(%rsp), %xmm9 - paddd 128+96(%rsp), %xmm10 - paddd 128+112(%rsp), %xmm11 - paddd 256+64(%rsp), %xmm12 - paddd 256+80(%rsp), %xmm13 - paddd 256+96(%rsp), %xmm14 - paddd 256+112(%rsp), %xmm15 - - addq $3*128, %rbx - cmpq %rax, %rbx - jne scrypt_core_3way_avx_loop1 - - movdqa %xmm0, 64(%rsp) - movdqa %xmm1, 80(%rsp) - movdqa %xmm2, 96(%rsp) - movdqa %xmm3, 112(%rsp) - movdqa %xmm8, 128+64(%rsp) - movdqa %xmm9, 128+80(%rsp) - movdqa %xmm10, 128+96(%rsp) - movdqa %xmm11, 128+112(%rsp) - movdqa %xmm12, 256+64(%rsp) - movdqa %xmm13, 256+80(%rsp) - movdqa %xmm14, 256+96(%rsp) - movdqa %xmm15, 256+112(%rsp) - - movq %r8, %rcx - subq $1, %r8 -scrypt_core_3way_avx_loop2: - movd %xmm0, %ebp - movd %xmm8, %ebx - movd %xmm12, %eax - pxor 0(%rsp), %xmm0 - pxor 16(%rsp), %xmm1 - pxor 32(%rsp), %xmm2 - pxor 48(%rsp), %xmm3 - pxor 128+0(%rsp), %xmm8 - pxor 128+16(%rsp), %xmm9 - pxor 128+32(%rsp), %xmm10 - pxor 128+48(%rsp), %xmm11 - pxor 256+0(%rsp), %xmm12 - pxor 256+16(%rsp), %xmm13 - pxor 256+32(%rsp), %xmm14 - pxor 256+48(%rsp), %xmm15 - andl %r8d, %ebp - leaq (%rbp, %rbp, 2), %rbp - shll $7, %ebp - andl %r8d, %ebx - leaq 1(%rbx, %rbx, 2), %rbx - shll $7, %ebx - andl %r8d, %eax - leaq 2(%rax, %rax, 2), %rax - shll $7, %eax - pxor 0(%rsi, %rbp), %xmm0 - pxor 16(%rsi, %rbp), %xmm1 - pxor 32(%rsi, %rbp), %xmm2 - pxor 48(%rsi, %rbp), %xmm3 - pxor 0(%rsi, %rbx), %xmm8 - pxor 16(%rsi, %rbx), %xmm9 - pxor 32(%rsi, %rbx), %xmm10 - pxor 48(%rsi, %rbx), %xmm11 - pxor 0(%rsi, %rax), %xmm12 - pxor 16(%rsi, %rax), %xmm13 - pxor 32(%rsi, %rax), %xmm14 - pxor 48(%rsi, %rax), %xmm15 - - movdqa %xmm0, 0(%rsp) - movdqa %xmm1, 16(%rsp) - movdqa %xmm2, 32(%rsp) - movdqa %xmm3, 48(%rsp) - movdqa %xmm8, 128+0(%rsp) - movdqa %xmm9, 128+16(%rsp) - movdqa %xmm10, 128+32(%rsp) - movdqa %xmm11, 128+48(%rsp) - movdqa %xmm12, 256+0(%rsp) - movdqa %xmm13, 256+16(%rsp) - movdqa %xmm14, 256+32(%rsp) - movdqa %xmm15, 256+48(%rsp) - salsa8_core_3way_avx - paddd 0(%rsp), %xmm0 - paddd 16(%rsp), %xmm1 - paddd 32(%rsp), %xmm2 - paddd 48(%rsp), %xmm3 - paddd 128+0(%rsp), %xmm8 - paddd 128+16(%rsp), %xmm9 - paddd 128+32(%rsp), %xmm10 - paddd 128+48(%rsp), %xmm11 - paddd 256+0(%rsp), %xmm12 - paddd 256+16(%rsp), %xmm13 - paddd 256+32(%rsp), %xmm14 - paddd 256+48(%rsp), %xmm15 - movdqa %xmm0, 0(%rsp) - movdqa %xmm1, 16(%rsp) - movdqa %xmm2, 32(%rsp) - movdqa %xmm3, 48(%rsp) - movdqa %xmm8, 128+0(%rsp) - movdqa %xmm9, 128+16(%rsp) - movdqa %xmm10, 128+32(%rsp) - movdqa %xmm11, 128+48(%rsp) - movdqa %xmm12, 256+0(%rsp) - movdqa %xmm13, 256+16(%rsp) - movdqa %xmm14, 256+32(%rsp) - movdqa %xmm15, 256+48(%rsp) - - pxor 64(%rsi, %rbp), %xmm0 - pxor 80(%rsi, %rbp), %xmm1 - pxor 96(%rsi, %rbp), %xmm2 - pxor 112(%rsi, %rbp), %xmm3 - pxor 64(%rsi, %rbx), %xmm8 - pxor 80(%rsi, %rbx), %xmm9 - pxor 96(%rsi, %rbx), %xmm10 - pxor 112(%rsi, %rbx), %xmm11 - pxor 64(%rsi, %rax), %xmm12 - pxor 80(%rsi, %rax), %xmm13 - pxor 96(%rsi, %rax), %xmm14 - pxor 112(%rsi, %rax), %xmm15 - pxor 64(%rsp), %xmm0 - pxor 80(%rsp), %xmm1 - pxor 96(%rsp), %xmm2 - pxor 112(%rsp), %xmm3 - pxor 128+64(%rsp), %xmm8 - pxor 128+80(%rsp), %xmm9 - pxor 128+96(%rsp), %xmm10 - pxor 128+112(%rsp), %xmm11 - pxor 256+64(%rsp), %xmm12 - pxor 256+80(%rsp), %xmm13 - pxor 256+96(%rsp), %xmm14 - pxor 256+112(%rsp), %xmm15 - movdqa %xmm0, 64(%rsp) - movdqa %xmm1, 80(%rsp) - movdqa %xmm2, 96(%rsp) - movdqa %xmm3, 112(%rsp) - movdqa %xmm8, 128+64(%rsp) - movdqa %xmm9, 128+80(%rsp) - movdqa %xmm10, 128+96(%rsp) - movdqa %xmm11, 128+112(%rsp) - movdqa %xmm12, 256+64(%rsp) - movdqa %xmm13, 256+80(%rsp) - movdqa %xmm14, 256+96(%rsp) - movdqa %xmm15, 256+112(%rsp) - salsa8_core_3way_avx - paddd 64(%rsp), %xmm0 - paddd 80(%rsp), %xmm1 - paddd 96(%rsp), %xmm2 - paddd 112(%rsp), %xmm3 - paddd 128+64(%rsp), %xmm8 - paddd 128+80(%rsp), %xmm9 - paddd 128+96(%rsp), %xmm10 - paddd 128+112(%rsp), %xmm11 - paddd 256+64(%rsp), %xmm12 - paddd 256+80(%rsp), %xmm13 - paddd 256+96(%rsp), %xmm14 - paddd 256+112(%rsp), %xmm15 - movdqa %xmm0, 64(%rsp) - movdqa %xmm1, 80(%rsp) - movdqa %xmm2, 96(%rsp) - movdqa %xmm3, 112(%rsp) - movdqa %xmm8, 128+64(%rsp) - movdqa %xmm9, 128+80(%rsp) - movdqa %xmm10, 128+96(%rsp) - movdqa %xmm11, 128+112(%rsp) - movdqa %xmm12, 256+64(%rsp) - movdqa %xmm13, 256+80(%rsp) - movdqa %xmm14, 256+96(%rsp) - movdqa %xmm15, 256+112(%rsp) - - subq $1, %rcx - ja scrypt_core_3way_avx_loop2 - - scrypt_shuffle %rsp, 0, %rdi, 0 - scrypt_shuffle %rsp, 64, %rdi, 64 - scrypt_shuffle %rsp, 128, %rdi, 128 - scrypt_shuffle %rsp, 192, %rdi, 192 - scrypt_shuffle %rsp, 256, %rdi, 256 - scrypt_shuffle %rsp, 320, %rdi, 320 - - scrypt_core_3way_cleanup - ret - -#if defined(USE_XOP) -.macro salsa8_core_3way_xop_doubleround - vpaddd %xmm0, %xmm1, %xmm4 - vpaddd %xmm8, %xmm9, %xmm6 - vpaddd %xmm12, %xmm13, %xmm7 - vprotd $7, %xmm4, %xmm4 - vprotd $7, %xmm6, %xmm6 - vprotd $7, %xmm7, %xmm7 - vpxor %xmm4, %xmm3, %xmm3 - vpxor %xmm6, %xmm11, %xmm11 - vpxor %xmm7, %xmm15, %xmm15 - - vpaddd %xmm3, %xmm0, %xmm4 - vpaddd %xmm11, %xmm8, %xmm6 - vpaddd %xmm15, %xmm12, %xmm7 - vprotd $9, %xmm4, %xmm4 - vprotd $9, %xmm6, %xmm6 - vprotd $9, %xmm7, %xmm7 - vpxor %xmm4, %xmm2, %xmm2 - vpxor %xmm6, %xmm10, %xmm10 - vpxor %xmm7, %xmm14, %xmm14 - - vpaddd %xmm2, %xmm3, %xmm4 - vpaddd %xmm10, %xmm11, %xmm6 - vpaddd %xmm14, %xmm15, %xmm7 - vprotd $13, %xmm4, %xmm4 - vprotd $13, %xmm6, %xmm6 - vprotd $13, %xmm7, %xmm7 - vpshufd $0x93, %xmm3, %xmm3 - vpshufd $0x93, %xmm11, %xmm11 - vpshufd $0x93, %xmm15, %xmm15 - vpxor %xmm4, %xmm1, %xmm1 - vpxor %xmm6, %xmm9, %xmm9 - vpxor %xmm7, %xmm13, %xmm13 - - vpaddd %xmm1, %xmm2, %xmm4 - vpaddd %xmm9, %xmm10, %xmm6 - vpaddd %xmm13, %xmm14, %xmm7 - vprotd $18, %xmm4, %xmm4 - vprotd $18, %xmm6, %xmm6 - vprotd $18, %xmm7, %xmm7 - vpshufd $0x4e, %xmm2, %xmm2 - vpshufd $0x4e, %xmm10, %xmm10 - vpshufd $0x4e, %xmm14, %xmm14 - vpxor %xmm6, %xmm8, %xmm8 - vpxor %xmm4, %xmm0, %xmm0 - vpxor %xmm7, %xmm12, %xmm12 - - vpaddd %xmm0, %xmm3, %xmm4 - vpaddd %xmm8, %xmm11, %xmm6 - vpaddd %xmm12, %xmm15, %xmm7 - vprotd $7, %xmm4, %xmm4 - vprotd $7, %xmm6, %xmm6 - vprotd $7, %xmm7, %xmm7 - vpshufd $0x39, %xmm1, %xmm1 - vpshufd $0x39, %xmm9, %xmm9 - vpshufd $0x39, %xmm13, %xmm13 - vpxor %xmm4, %xmm1, %xmm1 - vpxor %xmm6, %xmm9, %xmm9 - vpxor %xmm7, %xmm13, %xmm13 - - vpaddd %xmm1, %xmm0, %xmm4 - vpaddd %xmm9, %xmm8, %xmm6 - vpaddd %xmm13, %xmm12, %xmm7 - vprotd $9, %xmm4, %xmm4 - vprotd $9, %xmm6, %xmm6 - vprotd $9, %xmm7, %xmm7 - vpxor %xmm4, %xmm2, %xmm2 - vpxor %xmm6, %xmm10, %xmm10 - vpxor %xmm7, %xmm14, %xmm14 - - vpaddd %xmm2, %xmm1, %xmm4 - vpaddd %xmm10, %xmm9, %xmm6 - vpaddd %xmm14, %xmm13, %xmm7 - vprotd $13, %xmm4, %xmm4 - vprotd $13, %xmm6, %xmm6 - vprotd $13, %xmm7, %xmm7 - vpshufd $0x93, %xmm1, %xmm1 - vpshufd $0x93, %xmm9, %xmm9 - vpshufd $0x93, %xmm13, %xmm13 - vpxor %xmm4, %xmm3, %xmm3 - vpxor %xmm6, %xmm11, %xmm11 - vpxor %xmm7, %xmm15, %xmm15 - - vpaddd %xmm3, %xmm2, %xmm4 - vpaddd %xmm11, %xmm10, %xmm6 - vpaddd %xmm15, %xmm14, %xmm7 - vprotd $18, %xmm4, %xmm4 - vprotd $18, %xmm6, %xmm6 - vprotd $18, %xmm7, %xmm7 - vpshufd $0x4e, %xmm2, %xmm2 - vpshufd $0x4e, %xmm10, %xmm10 - vpshufd $0x4e, %xmm14, %xmm14 - vpxor %xmm4, %xmm0, %xmm0 - vpxor %xmm6, %xmm8, %xmm8 - vpxor %xmm7, %xmm12, %xmm12 - vpshufd $0x39, %xmm3, %xmm3 - vpshufd $0x39, %xmm11, %xmm11 - vpshufd $0x39, %xmm15, %xmm15 -.endm - -.macro salsa8_core_3way_xop - salsa8_core_3way_xop_doubleround - salsa8_core_3way_xop_doubleround - salsa8_core_3way_xop_doubleround - salsa8_core_3way_xop_doubleround -.endm - - .p2align 6 -scrypt_core_3way_xop: - scrypt_shuffle %rdi, 0, %rsp, 0 - scrypt_shuffle %rdi, 64, %rsp, 64 - scrypt_shuffle %rdi, 128, %rsp, 128 - scrypt_shuffle %rdi, 192, %rsp, 192 - scrypt_shuffle %rdi, 256, %rsp, 256 - scrypt_shuffle %rdi, 320, %rsp, 320 - - movdqa 64(%rsp), %xmm0 - movdqa 80(%rsp), %xmm1 - movdqa 96(%rsp), %xmm2 - movdqa 112(%rsp), %xmm3 - movdqa 128+64(%rsp), %xmm8 - movdqa 128+80(%rsp), %xmm9 - movdqa 128+96(%rsp), %xmm10 - movdqa 128+112(%rsp), %xmm11 - movdqa 256+64(%rsp), %xmm12 - movdqa 256+80(%rsp), %xmm13 - movdqa 256+96(%rsp), %xmm14 - movdqa 256+112(%rsp), %xmm15 - - movq %rsi, %rbx - leaq (%r8, %r8, 2), %rax - shlq $7, %rax - addq %rsi, %rax -scrypt_core_3way_xop_loop1: - movdqa %xmm0, 64(%rbx) - movdqa %xmm1, 80(%rbx) - movdqa %xmm2, 96(%rbx) - movdqa %xmm3, 112(%rbx) - pxor 0(%rsp), %xmm0 - pxor 16(%rsp), %xmm1 - pxor 32(%rsp), %xmm2 - pxor 48(%rsp), %xmm3 - movdqa %xmm8, 128+64(%rbx) - movdqa %xmm9, 128+80(%rbx) - movdqa %xmm10, 128+96(%rbx) - movdqa %xmm11, 128+112(%rbx) - pxor 128+0(%rsp), %xmm8 - pxor 128+16(%rsp), %xmm9 - pxor 128+32(%rsp), %xmm10 - pxor 128+48(%rsp), %xmm11 - movdqa %xmm12, 256+64(%rbx) - movdqa %xmm13, 256+80(%rbx) - movdqa %xmm14, 256+96(%rbx) - movdqa %xmm15, 256+112(%rbx) - pxor 256+0(%rsp), %xmm12 - pxor 256+16(%rsp), %xmm13 - pxor 256+32(%rsp), %xmm14 - pxor 256+48(%rsp), %xmm15 - movdqa %xmm0, 0(%rbx) - movdqa %xmm1, 16(%rbx) - movdqa %xmm2, 32(%rbx) - movdqa %xmm3, 48(%rbx) - movdqa %xmm8, 128+0(%rbx) - movdqa %xmm9, 128+16(%rbx) - movdqa %xmm10, 128+32(%rbx) - movdqa %xmm11, 128+48(%rbx) - movdqa %xmm12, 256+0(%rbx) - movdqa %xmm13, 256+16(%rbx) - movdqa %xmm14, 256+32(%rbx) - movdqa %xmm15, 256+48(%rbx) - - salsa8_core_3way_xop - paddd 0(%rbx), %xmm0 - paddd 16(%rbx), %xmm1 - paddd 32(%rbx), %xmm2 - paddd 48(%rbx), %xmm3 - paddd 128+0(%rbx), %xmm8 - paddd 128+16(%rbx), %xmm9 - paddd 128+32(%rbx), %xmm10 - paddd 128+48(%rbx), %xmm11 - paddd 256+0(%rbx), %xmm12 - paddd 256+16(%rbx), %xmm13 - paddd 256+32(%rbx), %xmm14 - paddd 256+48(%rbx), %xmm15 - movdqa %xmm0, 0(%rsp) - movdqa %xmm1, 16(%rsp) - movdqa %xmm2, 32(%rsp) - movdqa %xmm3, 48(%rsp) - movdqa %xmm8, 128+0(%rsp) - movdqa %xmm9, 128+16(%rsp) - movdqa %xmm10, 128+32(%rsp) - movdqa %xmm11, 128+48(%rsp) - movdqa %xmm12, 256+0(%rsp) - movdqa %xmm13, 256+16(%rsp) - movdqa %xmm14, 256+32(%rsp) - movdqa %xmm15, 256+48(%rsp) - - pxor 64(%rbx), %xmm0 - pxor 80(%rbx), %xmm1 - pxor 96(%rbx), %xmm2 - pxor 112(%rbx), %xmm3 - pxor 128+64(%rbx), %xmm8 - pxor 128+80(%rbx), %xmm9 - pxor 128+96(%rbx), %xmm10 - pxor 128+112(%rbx), %xmm11 - pxor 256+64(%rbx), %xmm12 - pxor 256+80(%rbx), %xmm13 - pxor 256+96(%rbx), %xmm14 - pxor 256+112(%rbx), %xmm15 - movdqa %xmm0, 64(%rsp) - movdqa %xmm1, 80(%rsp) - movdqa %xmm2, 96(%rsp) - movdqa %xmm3, 112(%rsp) - movdqa %xmm8, 128+64(%rsp) - movdqa %xmm9, 128+80(%rsp) - movdqa %xmm10, 128+96(%rsp) - movdqa %xmm11, 128+112(%rsp) - movdqa %xmm12, 256+64(%rsp) - movdqa %xmm13, 256+80(%rsp) - movdqa %xmm14, 256+96(%rsp) - movdqa %xmm15, 256+112(%rsp) - salsa8_core_3way_xop - paddd 64(%rsp), %xmm0 - paddd 80(%rsp), %xmm1 - paddd 96(%rsp), %xmm2 - paddd 112(%rsp), %xmm3 - paddd 128+64(%rsp), %xmm8 - paddd 128+80(%rsp), %xmm9 - paddd 128+96(%rsp), %xmm10 - paddd 128+112(%rsp), %xmm11 - paddd 256+64(%rsp), %xmm12 - paddd 256+80(%rsp), %xmm13 - paddd 256+96(%rsp), %xmm14 - paddd 256+112(%rsp), %xmm15 - - addq $3*128, %rbx - cmpq %rax, %rbx - jne scrypt_core_3way_xop_loop1 - - movdqa %xmm0, 64(%rsp) - movdqa %xmm1, 80(%rsp) - movdqa %xmm2, 96(%rsp) - movdqa %xmm3, 112(%rsp) - movdqa %xmm8, 128+64(%rsp) - movdqa %xmm9, 128+80(%rsp) - movdqa %xmm10, 128+96(%rsp) - movdqa %xmm11, 128+112(%rsp) - movdqa %xmm12, 256+64(%rsp) - movdqa %xmm13, 256+80(%rsp) - movdqa %xmm14, 256+96(%rsp) - movdqa %xmm15, 256+112(%rsp) - - movq %r8, %rcx - subq $1, %r8 -scrypt_core_3way_xop_loop2: - movd %xmm0, %ebp - movd %xmm8, %ebx - movd %xmm12, %eax - pxor 0(%rsp), %xmm0 - pxor 16(%rsp), %xmm1 - pxor 32(%rsp), %xmm2 - pxor 48(%rsp), %xmm3 - pxor 128+0(%rsp), %xmm8 - pxor 128+16(%rsp), %xmm9 - pxor 128+32(%rsp), %xmm10 - pxor 128+48(%rsp), %xmm11 - pxor 256+0(%rsp), %xmm12 - pxor 256+16(%rsp), %xmm13 - pxor 256+32(%rsp), %xmm14 - pxor 256+48(%rsp), %xmm15 - andl %r8d, %ebp - leaq (%rbp, %rbp, 2), %rbp - shll $7, %ebp - andl %r8d, %ebx - leaq 1(%rbx, %rbx, 2), %rbx - shll $7, %ebx - andl %r8d, %eax - leaq 2(%rax, %rax, 2), %rax - shll $7, %eax - pxor 0(%rsi, %rbp), %xmm0 - pxor 16(%rsi, %rbp), %xmm1 - pxor 32(%rsi, %rbp), %xmm2 - pxor 48(%rsi, %rbp), %xmm3 - pxor 0(%rsi, %rbx), %xmm8 - pxor 16(%rsi, %rbx), %xmm9 - pxor 32(%rsi, %rbx), %xmm10 - pxor 48(%rsi, %rbx), %xmm11 - pxor 0(%rsi, %rax), %xmm12 - pxor 16(%rsi, %rax), %xmm13 - pxor 32(%rsi, %rax), %xmm14 - pxor 48(%rsi, %rax), %xmm15 - - movdqa %xmm0, 0(%rsp) - movdqa %xmm1, 16(%rsp) - movdqa %xmm2, 32(%rsp) - movdqa %xmm3, 48(%rsp) - movdqa %xmm8, 128+0(%rsp) - movdqa %xmm9, 128+16(%rsp) - movdqa %xmm10, 128+32(%rsp) - movdqa %xmm11, 128+48(%rsp) - movdqa %xmm12, 256+0(%rsp) - movdqa %xmm13, 256+16(%rsp) - movdqa %xmm14, 256+32(%rsp) - movdqa %xmm15, 256+48(%rsp) - salsa8_core_3way_xop - paddd 0(%rsp), %xmm0 - paddd 16(%rsp), %xmm1 - paddd 32(%rsp), %xmm2 - paddd 48(%rsp), %xmm3 - paddd 128+0(%rsp), %xmm8 - paddd 128+16(%rsp), %xmm9 - paddd 128+32(%rsp), %xmm10 - paddd 128+48(%rsp), %xmm11 - paddd 256+0(%rsp), %xmm12 - paddd 256+16(%rsp), %xmm13 - paddd 256+32(%rsp), %xmm14 - paddd 256+48(%rsp), %xmm15 - movdqa %xmm0, 0(%rsp) - movdqa %xmm1, 16(%rsp) - movdqa %xmm2, 32(%rsp) - movdqa %xmm3, 48(%rsp) - movdqa %xmm8, 128+0(%rsp) - movdqa %xmm9, 128+16(%rsp) - movdqa %xmm10, 128+32(%rsp) - movdqa %xmm11, 128+48(%rsp) - movdqa %xmm12, 256+0(%rsp) - movdqa %xmm13, 256+16(%rsp) - movdqa %xmm14, 256+32(%rsp) - movdqa %xmm15, 256+48(%rsp) - - pxor 64(%rsi, %rbp), %xmm0 - pxor 80(%rsi, %rbp), %xmm1 - pxor 96(%rsi, %rbp), %xmm2 - pxor 112(%rsi, %rbp), %xmm3 - pxor 64(%rsi, %rbx), %xmm8 - pxor 80(%rsi, %rbx), %xmm9 - pxor 96(%rsi, %rbx), %xmm10 - pxor 112(%rsi, %rbx), %xmm11 - pxor 64(%rsi, %rax), %xmm12 - pxor 80(%rsi, %rax), %xmm13 - pxor 96(%rsi, %rax), %xmm14 - pxor 112(%rsi, %rax), %xmm15 - pxor 64(%rsp), %xmm0 - pxor 80(%rsp), %xmm1 - pxor 96(%rsp), %xmm2 - pxor 112(%rsp), %xmm3 - pxor 128+64(%rsp), %xmm8 - pxor 128+80(%rsp), %xmm9 - pxor 128+96(%rsp), %xmm10 - pxor 128+112(%rsp), %xmm11 - pxor 256+64(%rsp), %xmm12 - pxor 256+80(%rsp), %xmm13 - pxor 256+96(%rsp), %xmm14 - pxor 256+112(%rsp), %xmm15 - movdqa %xmm0, 64(%rsp) - movdqa %xmm1, 80(%rsp) - movdqa %xmm2, 96(%rsp) - movdqa %xmm3, 112(%rsp) - movdqa %xmm8, 128+64(%rsp) - movdqa %xmm9, 128+80(%rsp) - movdqa %xmm10, 128+96(%rsp) - movdqa %xmm11, 128+112(%rsp) - movdqa %xmm12, 256+64(%rsp) - movdqa %xmm13, 256+80(%rsp) - movdqa %xmm14, 256+96(%rsp) - movdqa %xmm15, 256+112(%rsp) - salsa8_core_3way_xop - paddd 64(%rsp), %xmm0 - paddd 80(%rsp), %xmm1 - paddd 96(%rsp), %xmm2 - paddd 112(%rsp), %xmm3 - paddd 128+64(%rsp), %xmm8 - paddd 128+80(%rsp), %xmm9 - paddd 128+96(%rsp), %xmm10 - paddd 128+112(%rsp), %xmm11 - paddd 256+64(%rsp), %xmm12 - paddd 256+80(%rsp), %xmm13 - paddd 256+96(%rsp), %xmm14 - paddd 256+112(%rsp), %xmm15 - movdqa %xmm0, 64(%rsp) - movdqa %xmm1, 80(%rsp) - movdqa %xmm2, 96(%rsp) - movdqa %xmm3, 112(%rsp) - movdqa %xmm8, 128+64(%rsp) - movdqa %xmm9, 128+80(%rsp) - movdqa %xmm10, 128+96(%rsp) - movdqa %xmm11, 128+112(%rsp) - movdqa %xmm12, 256+64(%rsp) - movdqa %xmm13, 256+80(%rsp) - movdqa %xmm14, 256+96(%rsp) - movdqa %xmm15, 256+112(%rsp) - - subq $1, %rcx - ja scrypt_core_3way_xop_loop2 - - scrypt_shuffle %rsp, 0, %rdi, 0 - scrypt_shuffle %rsp, 64, %rdi, 64 - scrypt_shuffle %rsp, 128, %rdi, 128 - scrypt_shuffle %rsp, 192, %rdi, 192 - scrypt_shuffle %rsp, 256, %rdi, 256 - scrypt_shuffle %rsp, 320, %rdi, 320 - - scrypt_core_3way_cleanup - ret -#endif /* USE_XOP */ -#endif /* USE_AVX */ - -.macro salsa8_core_3way_xmm_doubleround - movdqa %xmm1, %xmm4 - movdqa %xmm9, %xmm6 - movdqa %xmm13, %xmm7 - paddd %xmm0, %xmm4 - paddd %xmm8, %xmm6 - paddd %xmm12, %xmm7 - movdqa %xmm4, %xmm5 - pslld $7, %xmm4 - psrld $25, %xmm5 - pxor %xmm4, %xmm3 - pxor %xmm5, %xmm3 - movdqa %xmm0, %xmm4 - movdqa %xmm6, %xmm5 - pslld $7, %xmm6 - psrld $25, %xmm5 - pxor %xmm6, %xmm11 - pxor %xmm5, %xmm11 - movdqa %xmm8, %xmm6 - movdqa %xmm7, %xmm5 - pslld $7, %xmm7 - psrld $25, %xmm5 - pxor %xmm7, %xmm15 - pxor %xmm5, %xmm15 - movdqa %xmm12, %xmm7 - - paddd %xmm3, %xmm4 - paddd %xmm11, %xmm6 - paddd %xmm15, %xmm7 - movdqa %xmm4, %xmm5 - pslld $9, %xmm4 - psrld $23, %xmm5 - pxor %xmm4, %xmm2 - movdqa %xmm3, %xmm4 - pshufd $0x93, %xmm3, %xmm3 - pxor %xmm5, %xmm2 - movdqa %xmm6, %xmm5 - pslld $9, %xmm6 - psrld $23, %xmm5 - pxor %xmm6, %xmm10 - movdqa %xmm11, %xmm6 - pshufd $0x93, %xmm11, %xmm11 - pxor %xmm5, %xmm10 - movdqa %xmm7, %xmm5 - pslld $9, %xmm7 - psrld $23, %xmm5 - pxor %xmm7, %xmm14 - movdqa %xmm15, %xmm7 - pxor %xmm5, %xmm14 - pshufd $0x93, %xmm15, %xmm15 - - paddd %xmm2, %xmm4 - paddd %xmm10, %xmm6 - paddd %xmm14, %xmm7 - movdqa %xmm4, %xmm5 - pslld $13, %xmm4 - psrld $19, %xmm5 - pxor %xmm4, %xmm1 - movdqa %xmm2, %xmm4 - pshufd $0x4e, %xmm2, %xmm2 - pxor %xmm5, %xmm1 - movdqa %xmm6, %xmm5 - pslld $13, %xmm6 - psrld $19, %xmm5 - pxor %xmm6, %xmm9 - movdqa %xmm10, %xmm6 - pshufd $0x4e, %xmm10, %xmm10 - pxor %xmm5, %xmm9 - movdqa %xmm7, %xmm5 - pslld $13, %xmm7 - psrld $19, %xmm5 - pxor %xmm7, %xmm13 - movdqa %xmm14, %xmm7 - pshufd $0x4e, %xmm14, %xmm14 - pxor %xmm5, %xmm13 - - paddd %xmm1, %xmm4 - paddd %xmm9, %xmm6 - paddd %xmm13, %xmm7 - movdqa %xmm4, %xmm5 - pslld $18, %xmm4 - psrld $14, %xmm5 - pxor %xmm4, %xmm0 - pshufd $0x39, %xmm1, %xmm1 - pxor %xmm5, %xmm0 - movdqa %xmm3, %xmm4 - movdqa %xmm6, %xmm5 - pslld $18, %xmm6 - psrld $14, %xmm5 - pxor %xmm6, %xmm8 - pshufd $0x39, %xmm9, %xmm9 - pxor %xmm5, %xmm8 - movdqa %xmm11, %xmm6 - movdqa %xmm7, %xmm5 - pslld $18, %xmm7 - psrld $14, %xmm5 - pxor %xmm7, %xmm12 - movdqa %xmm15, %xmm7 - pxor %xmm5, %xmm12 - pshufd $0x39, %xmm13, %xmm13 - - paddd %xmm0, %xmm4 - paddd %xmm8, %xmm6 - paddd %xmm12, %xmm7 - movdqa %xmm4, %xmm5 - pslld $7, %xmm4 - psrld $25, %xmm5 - pxor %xmm4, %xmm1 - pxor %xmm5, %xmm1 - movdqa %xmm0, %xmm4 - movdqa %xmm6, %xmm5 - pslld $7, %xmm6 - psrld $25, %xmm5 - pxor %xmm6, %xmm9 - pxor %xmm5, %xmm9 - movdqa %xmm8, %xmm6 - movdqa %xmm7, %xmm5 - pslld $7, %xmm7 - psrld $25, %xmm5 - pxor %xmm7, %xmm13 - pxor %xmm5, %xmm13 - movdqa %xmm12, %xmm7 - - paddd %xmm1, %xmm4 - paddd %xmm9, %xmm6 - paddd %xmm13, %xmm7 - movdqa %xmm4, %xmm5 - pslld $9, %xmm4 - psrld $23, %xmm5 - pxor %xmm4, %xmm2 - movdqa %xmm1, %xmm4 - pshufd $0x93, %xmm1, %xmm1 - pxor %xmm5, %xmm2 - movdqa %xmm6, %xmm5 - pslld $9, %xmm6 - psrld $23, %xmm5 - pxor %xmm6, %xmm10 - movdqa %xmm9, %xmm6 - pshufd $0x93, %xmm9, %xmm9 - pxor %xmm5, %xmm10 - movdqa %xmm7, %xmm5 - pslld $9, %xmm7 - psrld $23, %xmm5 - pxor %xmm7, %xmm14 - movdqa %xmm13, %xmm7 - pshufd $0x93, %xmm13, %xmm13 - pxor %xmm5, %xmm14 - - paddd %xmm2, %xmm4 - paddd %xmm10, %xmm6 - paddd %xmm14, %xmm7 - movdqa %xmm4, %xmm5 - pslld $13, %xmm4 - psrld $19, %xmm5 - pxor %xmm4, %xmm3 - movdqa %xmm2, %xmm4 - pshufd $0x4e, %xmm2, %xmm2 - pxor %xmm5, %xmm3 - movdqa %xmm6, %xmm5 - pslld $13, %xmm6 - psrld $19, %xmm5 - pxor %xmm6, %xmm11 - movdqa %xmm10, %xmm6 - pshufd $0x4e, %xmm10, %xmm10 - pxor %xmm5, %xmm11 - movdqa %xmm7, %xmm5 - pslld $13, %xmm7 - psrld $19, %xmm5 - pxor %xmm7, %xmm15 - movdqa %xmm14, %xmm7 - pshufd $0x4e, %xmm14, %xmm14 - pxor %xmm5, %xmm15 - - paddd %xmm3, %xmm4 - paddd %xmm11, %xmm6 - paddd %xmm15, %xmm7 - movdqa %xmm4, %xmm5 - pslld $18, %xmm4 - psrld $14, %xmm5 - pxor %xmm4, %xmm0 - pshufd $0x39, %xmm3, %xmm3 - pxor %xmm5, %xmm0 - movdqa %xmm6, %xmm5 - pslld $18, %xmm6 - psrld $14, %xmm5 - pxor %xmm6, %xmm8 - pshufd $0x39, %xmm11, %xmm11 - pxor %xmm5, %xmm8 - movdqa %xmm7, %xmm5 - pslld $18, %xmm7 - psrld $14, %xmm5 - pxor %xmm7, %xmm12 - pshufd $0x39, %xmm15, %xmm15 - pxor %xmm5, %xmm12 -.endm - -.macro salsa8_core_3way_xmm - salsa8_core_3way_xmm_doubleround - salsa8_core_3way_xmm_doubleround - salsa8_core_3way_xmm_doubleround - salsa8_core_3way_xmm_doubleround -.endm - - .p2align 6 -scrypt_core_3way_xmm: - scrypt_shuffle %rdi, 0, %rsp, 0 - scrypt_shuffle %rdi, 64, %rsp, 64 - scrypt_shuffle %rdi, 128, %rsp, 128 - scrypt_shuffle %rdi, 192, %rsp, 192 - scrypt_shuffle %rdi, 256, %rsp, 256 - scrypt_shuffle %rdi, 320, %rsp, 320 - - movdqa 64(%rsp), %xmm0 - movdqa 80(%rsp), %xmm1 - movdqa 96(%rsp), %xmm2 - movdqa 112(%rsp), %xmm3 - movdqa 128+64(%rsp), %xmm8 - movdqa 128+80(%rsp), %xmm9 - movdqa 128+96(%rsp), %xmm10 - movdqa 128+112(%rsp), %xmm11 - movdqa 256+64(%rsp), %xmm12 - movdqa 256+80(%rsp), %xmm13 - movdqa 256+96(%rsp), %xmm14 - movdqa 256+112(%rsp), %xmm15 - - movq %rsi, %rbx - leaq (%r8, %r8, 2), %rax - shlq $7, %rax - addq %rsi, %rax -scrypt_core_3way_xmm_loop1: - movdqa %xmm0, 64(%rbx) - movdqa %xmm1, 80(%rbx) - movdqa %xmm2, 96(%rbx) - movdqa %xmm3, 112(%rbx) - pxor 0(%rsp), %xmm0 - pxor 16(%rsp), %xmm1 - pxor 32(%rsp), %xmm2 - pxor 48(%rsp), %xmm3 - movdqa %xmm8, 128+64(%rbx) - movdqa %xmm9, 128+80(%rbx) - movdqa %xmm10, 128+96(%rbx) - movdqa %xmm11, 128+112(%rbx) - pxor 128+0(%rsp), %xmm8 - pxor 128+16(%rsp), %xmm9 - pxor 128+32(%rsp), %xmm10 - pxor 128+48(%rsp), %xmm11 - movdqa %xmm12, 256+64(%rbx) - movdqa %xmm13, 256+80(%rbx) - movdqa %xmm14, 256+96(%rbx) - movdqa %xmm15, 256+112(%rbx) - pxor 256+0(%rsp), %xmm12 - pxor 256+16(%rsp), %xmm13 - pxor 256+32(%rsp), %xmm14 - pxor 256+48(%rsp), %xmm15 - movdqa %xmm0, 0(%rbx) - movdqa %xmm1, 16(%rbx) - movdqa %xmm2, 32(%rbx) - movdqa %xmm3, 48(%rbx) - movdqa %xmm8, 128+0(%rbx) - movdqa %xmm9, 128+16(%rbx) - movdqa %xmm10, 128+32(%rbx) - movdqa %xmm11, 128+48(%rbx) - movdqa %xmm12, 256+0(%rbx) - movdqa %xmm13, 256+16(%rbx) - movdqa %xmm14, 256+32(%rbx) - movdqa %xmm15, 256+48(%rbx) - - salsa8_core_3way_xmm - paddd 0(%rbx), %xmm0 - paddd 16(%rbx), %xmm1 - paddd 32(%rbx), %xmm2 - paddd 48(%rbx), %xmm3 - paddd 128+0(%rbx), %xmm8 - paddd 128+16(%rbx), %xmm9 - paddd 128+32(%rbx), %xmm10 - paddd 128+48(%rbx), %xmm11 - paddd 256+0(%rbx), %xmm12 - paddd 256+16(%rbx), %xmm13 - paddd 256+32(%rbx), %xmm14 - paddd 256+48(%rbx), %xmm15 - movdqa %xmm0, 0(%rsp) - movdqa %xmm1, 16(%rsp) - movdqa %xmm2, 32(%rsp) - movdqa %xmm3, 48(%rsp) - movdqa %xmm8, 128+0(%rsp) - movdqa %xmm9, 128+16(%rsp) - movdqa %xmm10, 128+32(%rsp) - movdqa %xmm11, 128+48(%rsp) - movdqa %xmm12, 256+0(%rsp) - movdqa %xmm13, 256+16(%rsp) - movdqa %xmm14, 256+32(%rsp) - movdqa %xmm15, 256+48(%rsp) - - pxor 64(%rbx), %xmm0 - pxor 80(%rbx), %xmm1 - pxor 96(%rbx), %xmm2 - pxor 112(%rbx), %xmm3 - pxor 128+64(%rbx), %xmm8 - pxor 128+80(%rbx), %xmm9 - pxor 128+96(%rbx), %xmm10 - pxor 128+112(%rbx), %xmm11 - pxor 256+64(%rbx), %xmm12 - pxor 256+80(%rbx), %xmm13 - pxor 256+96(%rbx), %xmm14 - pxor 256+112(%rbx), %xmm15 - movdqa %xmm0, 64(%rsp) - movdqa %xmm1, 80(%rsp) - movdqa %xmm2, 96(%rsp) - movdqa %xmm3, 112(%rsp) - movdqa %xmm8, 128+64(%rsp) - movdqa %xmm9, 128+80(%rsp) - movdqa %xmm10, 128+96(%rsp) - movdqa %xmm11, 128+112(%rsp) - movdqa %xmm12, 256+64(%rsp) - movdqa %xmm13, 256+80(%rsp) - movdqa %xmm14, 256+96(%rsp) - movdqa %xmm15, 256+112(%rsp) - salsa8_core_3way_xmm - paddd 64(%rsp), %xmm0 - paddd 80(%rsp), %xmm1 - paddd 96(%rsp), %xmm2 - paddd 112(%rsp), %xmm3 - paddd 128+64(%rsp), %xmm8 - paddd 128+80(%rsp), %xmm9 - paddd 128+96(%rsp), %xmm10 - paddd 128+112(%rsp), %xmm11 - paddd 256+64(%rsp), %xmm12 - paddd 256+80(%rsp), %xmm13 - paddd 256+96(%rsp), %xmm14 - paddd 256+112(%rsp), %xmm15 - - addq $3*128, %rbx - cmpq %rax, %rbx - jne scrypt_core_3way_xmm_loop1 - - movdqa %xmm0, 64(%rsp) - movdqa %xmm1, 80(%rsp) - movdqa %xmm2, 96(%rsp) - movdqa %xmm3, 112(%rsp) - movdqa %xmm8, 128+64(%rsp) - movdqa %xmm9, 128+80(%rsp) - movdqa %xmm10, 128+96(%rsp) - movdqa %xmm11, 128+112(%rsp) - movdqa %xmm12, 256+64(%rsp) - movdqa %xmm13, 256+80(%rsp) - movdqa %xmm14, 256+96(%rsp) - movdqa %xmm15, 256+112(%rsp) - - movq %r8, %rcx - subq $1, %r8 -scrypt_core_3way_xmm_loop2: - movd %xmm0, %ebp - movd %xmm8, %ebx - movd %xmm12, %eax - pxor 0(%rsp), %xmm0 - pxor 16(%rsp), %xmm1 - pxor 32(%rsp), %xmm2 - pxor 48(%rsp), %xmm3 - pxor 128+0(%rsp), %xmm8 - pxor 128+16(%rsp), %xmm9 - pxor 128+32(%rsp), %xmm10 - pxor 128+48(%rsp), %xmm11 - pxor 256+0(%rsp), %xmm12 - pxor 256+16(%rsp), %xmm13 - pxor 256+32(%rsp), %xmm14 - pxor 256+48(%rsp), %xmm15 - andl %r8d, %ebp - leaq (%rbp, %rbp, 2), %rbp - shll $7, %ebp - andl %r8d, %ebx - leaq 1(%rbx, %rbx, 2), %rbx - shll $7, %ebx - andl %r8d, %eax - leaq 2(%rax, %rax, 2), %rax - shll $7, %eax - pxor 0(%rsi, %rbp), %xmm0 - pxor 16(%rsi, %rbp), %xmm1 - pxor 32(%rsi, %rbp), %xmm2 - pxor 48(%rsi, %rbp), %xmm3 - pxor 0(%rsi, %rbx), %xmm8 - pxor 16(%rsi, %rbx), %xmm9 - pxor 32(%rsi, %rbx), %xmm10 - pxor 48(%rsi, %rbx), %xmm11 - pxor 0(%rsi, %rax), %xmm12 - pxor 16(%rsi, %rax), %xmm13 - pxor 32(%rsi, %rax), %xmm14 - pxor 48(%rsi, %rax), %xmm15 - - movdqa %xmm0, 0(%rsp) - movdqa %xmm1, 16(%rsp) - movdqa %xmm2, 32(%rsp) - movdqa %xmm3, 48(%rsp) - movdqa %xmm8, 128+0(%rsp) - movdqa %xmm9, 128+16(%rsp) - movdqa %xmm10, 128+32(%rsp) - movdqa %xmm11, 128+48(%rsp) - movdqa %xmm12, 256+0(%rsp) - movdqa %xmm13, 256+16(%rsp) - movdqa %xmm14, 256+32(%rsp) - movdqa %xmm15, 256+48(%rsp) - salsa8_core_3way_xmm - paddd 0(%rsp), %xmm0 - paddd 16(%rsp), %xmm1 - paddd 32(%rsp), %xmm2 - paddd 48(%rsp), %xmm3 - paddd 128+0(%rsp), %xmm8 - paddd 128+16(%rsp), %xmm9 - paddd 128+32(%rsp), %xmm10 - paddd 128+48(%rsp), %xmm11 - paddd 256+0(%rsp), %xmm12 - paddd 256+16(%rsp), %xmm13 - paddd 256+32(%rsp), %xmm14 - paddd 256+48(%rsp), %xmm15 - movdqa %xmm0, 0(%rsp) - movdqa %xmm1, 16(%rsp) - movdqa %xmm2, 32(%rsp) - movdqa %xmm3, 48(%rsp) - movdqa %xmm8, 128+0(%rsp) - movdqa %xmm9, 128+16(%rsp) - movdqa %xmm10, 128+32(%rsp) - movdqa %xmm11, 128+48(%rsp) - movdqa %xmm12, 256+0(%rsp) - movdqa %xmm13, 256+16(%rsp) - movdqa %xmm14, 256+32(%rsp) - movdqa %xmm15, 256+48(%rsp) - - pxor 64(%rsi, %rbp), %xmm0 - pxor 80(%rsi, %rbp), %xmm1 - pxor 96(%rsi, %rbp), %xmm2 - pxor 112(%rsi, %rbp), %xmm3 - pxor 64(%rsi, %rbx), %xmm8 - pxor 80(%rsi, %rbx), %xmm9 - pxor 96(%rsi, %rbx), %xmm10 - pxor 112(%rsi, %rbx), %xmm11 - pxor 64(%rsi, %rax), %xmm12 - pxor 80(%rsi, %rax), %xmm13 - pxor 96(%rsi, %rax), %xmm14 - pxor 112(%rsi, %rax), %xmm15 - pxor 64(%rsp), %xmm0 - pxor 80(%rsp), %xmm1 - pxor 96(%rsp), %xmm2 - pxor 112(%rsp), %xmm3 - pxor 128+64(%rsp), %xmm8 - pxor 128+80(%rsp), %xmm9 - pxor 128+96(%rsp), %xmm10 - pxor 128+112(%rsp), %xmm11 - pxor 256+64(%rsp), %xmm12 - pxor 256+80(%rsp), %xmm13 - pxor 256+96(%rsp), %xmm14 - pxor 256+112(%rsp), %xmm15 - movdqa %xmm0, 64(%rsp) - movdqa %xmm1, 80(%rsp) - movdqa %xmm2, 96(%rsp) - movdqa %xmm3, 112(%rsp) - movdqa %xmm8, 128+64(%rsp) - movdqa %xmm9, 128+80(%rsp) - movdqa %xmm10, 128+96(%rsp) - movdqa %xmm11, 128+112(%rsp) - movdqa %xmm12, 256+64(%rsp) - movdqa %xmm13, 256+80(%rsp) - movdqa %xmm14, 256+96(%rsp) - movdqa %xmm15, 256+112(%rsp) - salsa8_core_3way_xmm - paddd 64(%rsp), %xmm0 - paddd 80(%rsp), %xmm1 - paddd 96(%rsp), %xmm2 - paddd 112(%rsp), %xmm3 - paddd 128+64(%rsp), %xmm8 - paddd 128+80(%rsp), %xmm9 - paddd 128+96(%rsp), %xmm10 - paddd 128+112(%rsp), %xmm11 - paddd 256+64(%rsp), %xmm12 - paddd 256+80(%rsp), %xmm13 - paddd 256+96(%rsp), %xmm14 - paddd 256+112(%rsp), %xmm15 - movdqa %xmm0, 64(%rsp) - movdqa %xmm1, 80(%rsp) - movdqa %xmm2, 96(%rsp) - movdqa %xmm3, 112(%rsp) - movdqa %xmm8, 128+64(%rsp) - movdqa %xmm9, 128+80(%rsp) - movdqa %xmm10, 128+96(%rsp) - movdqa %xmm11, 128+112(%rsp) - movdqa %xmm12, 256+64(%rsp) - movdqa %xmm13, 256+80(%rsp) - movdqa %xmm14, 256+96(%rsp) - movdqa %xmm15, 256+112(%rsp) - - subq $1, %rcx - ja scrypt_core_3way_xmm_loop2 - - scrypt_shuffle %rsp, 0, %rdi, 0 - scrypt_shuffle %rsp, 64, %rdi, 64 - scrypt_shuffle %rsp, 128, %rdi, 128 - scrypt_shuffle %rsp, 192, %rdi, 192 - scrypt_shuffle %rsp, 256, %rdi, 256 - scrypt_shuffle %rsp, 320, %rdi, 320 - - scrypt_core_3way_cleanup - ret - - -#if defined(USE_AVX2) - -.macro salsa8_core_6way_avx2_doubleround - vpaddd %ymm0, %ymm1, %ymm4 - vpaddd %ymm8, %ymm9, %ymm6 - vpaddd %ymm12, %ymm13, %ymm7 - vpslld $7, %ymm4, %ymm5 - vpsrld $25, %ymm4, %ymm4 - vpxor %ymm5, %ymm3, %ymm3 - vpxor %ymm4, %ymm3, %ymm3 - vpslld $7, %ymm6, %ymm5 - vpsrld $25, %ymm6, %ymm6 - vpxor %ymm5, %ymm11, %ymm11 - vpxor %ymm6, %ymm11, %ymm11 - vpslld $7, %ymm7, %ymm5 - vpsrld $25, %ymm7, %ymm7 - vpxor %ymm5, %ymm15, %ymm15 - vpxor %ymm7, %ymm15, %ymm15 - - vpaddd %ymm3, %ymm0, %ymm4 - vpaddd %ymm11, %ymm8, %ymm6 - vpaddd %ymm15, %ymm12, %ymm7 - vpslld $9, %ymm4, %ymm5 - vpsrld $23, %ymm4, %ymm4 - vpxor %ymm5, %ymm2, %ymm2 - vpxor %ymm4, %ymm2, %ymm2 - vpslld $9, %ymm6, %ymm5 - vpsrld $23, %ymm6, %ymm6 - vpxor %ymm5, %ymm10, %ymm10 - vpxor %ymm6, %ymm10, %ymm10 - vpslld $9, %ymm7, %ymm5 - vpsrld $23, %ymm7, %ymm7 - vpxor %ymm5, %ymm14, %ymm14 - vpxor %ymm7, %ymm14, %ymm14 - - vpaddd %ymm2, %ymm3, %ymm4 - vpaddd %ymm10, %ymm11, %ymm6 - vpaddd %ymm14, %ymm15, %ymm7 - vpslld $13, %ymm4, %ymm5 - vpsrld $19, %ymm4, %ymm4 - vpshufd $0x93, %ymm3, %ymm3 - vpshufd $0x93, %ymm11, %ymm11 - vpshufd $0x93, %ymm15, %ymm15 - vpxor %ymm5, %ymm1, %ymm1 - vpxor %ymm4, %ymm1, %ymm1 - vpslld $13, %ymm6, %ymm5 - vpsrld $19, %ymm6, %ymm6 - vpxor %ymm5, %ymm9, %ymm9 - vpxor %ymm6, %ymm9, %ymm9 - vpslld $13, %ymm7, %ymm5 - vpsrld $19, %ymm7, %ymm7 - vpxor %ymm5, %ymm13, %ymm13 - vpxor %ymm7, %ymm13, %ymm13 - - vpaddd %ymm1, %ymm2, %ymm4 - vpaddd %ymm9, %ymm10, %ymm6 - vpaddd %ymm13, %ymm14, %ymm7 - vpslld $18, %ymm4, %ymm5 - vpsrld $14, %ymm4, %ymm4 - vpshufd $0x4e, %ymm2, %ymm2 - vpshufd $0x4e, %ymm10, %ymm10 - vpshufd $0x4e, %ymm14, %ymm14 - vpxor %ymm5, %ymm0, %ymm0 - vpxor %ymm4, %ymm0, %ymm0 - vpslld $18, %ymm6, %ymm5 - vpsrld $14, %ymm6, %ymm6 - vpxor %ymm5, %ymm8, %ymm8 - vpxor %ymm6, %ymm8, %ymm8 - vpslld $18, %ymm7, %ymm5 - vpsrld $14, %ymm7, %ymm7 - vpxor %ymm5, %ymm12, %ymm12 - vpxor %ymm7, %ymm12, %ymm12 - - vpaddd %ymm0, %ymm3, %ymm4 - vpaddd %ymm8, %ymm11, %ymm6 - vpaddd %ymm12, %ymm15, %ymm7 - vpslld $7, %ymm4, %ymm5 - vpsrld $25, %ymm4, %ymm4 - vpshufd $0x39, %ymm1, %ymm1 - vpxor %ymm5, %ymm1, %ymm1 - vpxor %ymm4, %ymm1, %ymm1 - vpslld $7, %ymm6, %ymm5 - vpsrld $25, %ymm6, %ymm6 - vpshufd $0x39, %ymm9, %ymm9 - vpxor %ymm5, %ymm9, %ymm9 - vpxor %ymm6, %ymm9, %ymm9 - vpslld $7, %ymm7, %ymm5 - vpsrld $25, %ymm7, %ymm7 - vpshufd $0x39, %ymm13, %ymm13 - vpxor %ymm5, %ymm13, %ymm13 - vpxor %ymm7, %ymm13, %ymm13 - - vpaddd %ymm1, %ymm0, %ymm4 - vpaddd %ymm9, %ymm8, %ymm6 - vpaddd %ymm13, %ymm12, %ymm7 - vpslld $9, %ymm4, %ymm5 - vpsrld $23, %ymm4, %ymm4 - vpxor %ymm5, %ymm2, %ymm2 - vpxor %ymm4, %ymm2, %ymm2 - vpslld $9, %ymm6, %ymm5 - vpsrld $23, %ymm6, %ymm6 - vpxor %ymm5, %ymm10, %ymm10 - vpxor %ymm6, %ymm10, %ymm10 - vpslld $9, %ymm7, %ymm5 - vpsrld $23, %ymm7, %ymm7 - vpxor %ymm5, %ymm14, %ymm14 - vpxor %ymm7, %ymm14, %ymm14 - - vpaddd %ymm2, %ymm1, %ymm4 - vpaddd %ymm10, %ymm9, %ymm6 - vpaddd %ymm14, %ymm13, %ymm7 - vpslld $13, %ymm4, %ymm5 - vpsrld $19, %ymm4, %ymm4 - vpshufd $0x93, %ymm1, %ymm1 - vpshufd $0x93, %ymm9, %ymm9 - vpshufd $0x93, %ymm13, %ymm13 - vpxor %ymm5, %ymm3, %ymm3 - vpxor %ymm4, %ymm3, %ymm3 - vpslld $13, %ymm6, %ymm5 - vpsrld $19, %ymm6, %ymm6 - vpxor %ymm5, %ymm11, %ymm11 - vpxor %ymm6, %ymm11, %ymm11 - vpslld $13, %ymm7, %ymm5 - vpsrld $19, %ymm7, %ymm7 - vpxor %ymm5, %ymm15, %ymm15 - vpxor %ymm7, %ymm15, %ymm15 - - vpaddd %ymm3, %ymm2, %ymm4 - vpaddd %ymm11, %ymm10, %ymm6 - vpaddd %ymm15, %ymm14, %ymm7 - vpslld $18, %ymm4, %ymm5 - vpsrld $14, %ymm4, %ymm4 - vpshufd $0x4e, %ymm2, %ymm2 - vpshufd $0x4e, %ymm10, %ymm10 - vpxor %ymm5, %ymm0, %ymm0 - vpxor %ymm4, %ymm0, %ymm0 - vpslld $18, %ymm6, %ymm5 - vpsrld $14, %ymm6, %ymm6 - vpshufd $0x4e, %ymm14, %ymm14 - vpshufd $0x39, %ymm11, %ymm11 - vpxor %ymm5, %ymm8, %ymm8 - vpxor %ymm6, %ymm8, %ymm8 - vpslld $18, %ymm7, %ymm5 - vpsrld $14, %ymm7, %ymm7 - vpshufd $0x39, %ymm3, %ymm3 - vpshufd $0x39, %ymm15, %ymm15 - vpxor %ymm5, %ymm12, %ymm12 - vpxor %ymm7, %ymm12, %ymm12 -.endm - -.macro salsa8_core_6way_avx2 - salsa8_core_6way_avx2_doubleround - salsa8_core_6way_avx2_doubleround - salsa8_core_6way_avx2_doubleround - salsa8_core_6way_avx2_doubleround -.endm - - .text - .p2align 6 - .globl scrypt_core_6way - .globl _scrypt_core_6way -scrypt_core_6way: -_scrypt_core_6way: - pushq %rbx - pushq %rbp -#if defined(_WIN64) || defined(__CYGWIN__) - subq $176, %rsp - vmovdqa %xmm6, 8(%rsp) - vmovdqa %xmm7, 24(%rsp) - vmovdqa %xmm8, 40(%rsp) - vmovdqa %xmm9, 56(%rsp) - vmovdqa %xmm10, 72(%rsp) - vmovdqa %xmm11, 88(%rsp) - vmovdqa %xmm12, 104(%rsp) - vmovdqa %xmm13, 120(%rsp) - vmovdqa %xmm14, 136(%rsp) - vmovdqa %xmm15, 152(%rsp) - pushq %rdi - pushq %rsi - movq %rcx, %rdi - movq %rdx, %rsi -#else - movq %rdx, %r8 -#endif - movq %rsp, %rdx - subq $768, %rsp - andq $-128, %rsp - -.macro scrypt_core_6way_cleanup - movq %rdx, %rsp -#if defined(_WIN64) || defined(__CYGWIN__) - popq %rsi - popq %rdi - vmovdqa 8(%rsp), %xmm6 - vmovdqa 24(%rsp), %xmm7 - vmovdqa 40(%rsp), %xmm8 - vmovdqa 56(%rsp), %xmm9 - vmovdqa 72(%rsp), %xmm10 - vmovdqa 88(%rsp), %xmm11 - vmovdqa 104(%rsp), %xmm12 - vmovdqa 120(%rsp), %xmm13 - vmovdqa 136(%rsp), %xmm14 - vmovdqa 152(%rsp), %xmm15 - addq $176, %rsp -#endif - popq %rbp - popq %rbx -.endm - -.macro scrypt_shuffle_pack2 src, so, dest, do - vmovdqa \so+0*16(\src), %xmm0 - vmovdqa \so+1*16(\src), %xmm1 - vmovdqa \so+2*16(\src), %xmm2 - vmovdqa \so+3*16(\src), %xmm3 - vinserti128 $1, \so+128+0*16(\src), %ymm0, %ymm0 - vinserti128 $1, \so+128+1*16(\src), %ymm1, %ymm1 - vinserti128 $1, \so+128+2*16(\src), %ymm2, %ymm2 - vinserti128 $1, \so+128+3*16(\src), %ymm3, %ymm3 - vpblendd $0x33, %ymm0, %ymm2, %ymm4 - vpblendd $0xcc, %ymm1, %ymm3, %ymm5 - vpblendd $0x33, %ymm2, %ymm0, %ymm6 - vpblendd $0xcc, %ymm3, %ymm1, %ymm7 - vpblendd $0x55, %ymm7, %ymm6, %ymm3 - vpblendd $0x55, %ymm6, %ymm5, %ymm2 - vpblendd $0x55, %ymm5, %ymm4, %ymm1 - vpblendd $0x55, %ymm4, %ymm7, %ymm0 - vmovdqa %ymm0, \do+0*32(\dest) - vmovdqa %ymm1, \do+1*32(\dest) - vmovdqa %ymm2, \do+2*32(\dest) - vmovdqa %ymm3, \do+3*32(\dest) -.endm - -.macro scrypt_shuffle_unpack2 src, so, dest, do - vmovdqa \so+0*32(\src), %ymm0 - vmovdqa \so+1*32(\src), %ymm1 - vmovdqa \so+2*32(\src), %ymm2 - vmovdqa \so+3*32(\src), %ymm3 - vpblendd $0x33, %ymm0, %ymm2, %ymm4 - vpblendd $0xcc, %ymm1, %ymm3, %ymm5 - vpblendd $0x33, %ymm2, %ymm0, %ymm6 - vpblendd $0xcc, %ymm3, %ymm1, %ymm7 - vpblendd $0x55, %ymm7, %ymm6, %ymm3 - vpblendd $0x55, %ymm6, %ymm5, %ymm2 - vpblendd $0x55, %ymm5, %ymm4, %ymm1 - vpblendd $0x55, %ymm4, %ymm7, %ymm0 - vmovdqa %xmm0, \do+0*16(\dest) - vmovdqa %xmm1, \do+1*16(\dest) - vmovdqa %xmm2, \do+2*16(\dest) - vmovdqa %xmm3, \do+3*16(\dest) - vextracti128 $1, %ymm0, \do+128+0*16(\dest) - vextracti128 $1, %ymm1, \do+128+1*16(\dest) - vextracti128 $1, %ymm2, \do+128+2*16(\dest) - vextracti128 $1, %ymm3, \do+128+3*16(\dest) -.endm - -scrypt_core_6way_avx2: - scrypt_shuffle_pack2 %rdi, 0*256+0, %rsp, 0*128 - scrypt_shuffle_pack2 %rdi, 0*256+64, %rsp, 1*128 - scrypt_shuffle_pack2 %rdi, 1*256+0, %rsp, 2*128 - scrypt_shuffle_pack2 %rdi, 1*256+64, %rsp, 3*128 - scrypt_shuffle_pack2 %rdi, 2*256+0, %rsp, 4*128 - scrypt_shuffle_pack2 %rdi, 2*256+64, %rsp, 5*128 - - vmovdqa 0*256+4*32(%rsp), %ymm0 - vmovdqa 0*256+5*32(%rsp), %ymm1 - vmovdqa 0*256+6*32(%rsp), %ymm2 - vmovdqa 0*256+7*32(%rsp), %ymm3 - vmovdqa 1*256+4*32(%rsp), %ymm8 - vmovdqa 1*256+5*32(%rsp), %ymm9 - vmovdqa 1*256+6*32(%rsp), %ymm10 - vmovdqa 1*256+7*32(%rsp), %ymm11 - vmovdqa 2*256+4*32(%rsp), %ymm12 - vmovdqa 2*256+5*32(%rsp), %ymm13 - vmovdqa 2*256+6*32(%rsp), %ymm14 - vmovdqa 2*256+7*32(%rsp), %ymm15 - - movq %rsi, %rbx - leaq (%r8, %r8, 2), %rax - shlq $8, %rax - addq %rsi, %rax -scrypt_core_6way_avx2_loop1: - vmovdqa %ymm0, 0*256+4*32(%rbx) - vmovdqa %ymm1, 0*256+5*32(%rbx) - vmovdqa %ymm2, 0*256+6*32(%rbx) - vmovdqa %ymm3, 0*256+7*32(%rbx) - vpxor 0*256+0*32(%rsp), %ymm0, %ymm0 - vpxor 0*256+1*32(%rsp), %ymm1, %ymm1 - vpxor 0*256+2*32(%rsp), %ymm2, %ymm2 - vpxor 0*256+3*32(%rsp), %ymm3, %ymm3 - vmovdqa %ymm8, 1*256+4*32(%rbx) - vmovdqa %ymm9, 1*256+5*32(%rbx) - vmovdqa %ymm10, 1*256+6*32(%rbx) - vmovdqa %ymm11, 1*256+7*32(%rbx) - vpxor 1*256+0*32(%rsp), %ymm8, %ymm8 - vpxor 1*256+1*32(%rsp), %ymm9, %ymm9 - vpxor 1*256+2*32(%rsp), %ymm10, %ymm10 - vpxor 1*256+3*32(%rsp), %ymm11, %ymm11 - vmovdqa %ymm12, 2*256+4*32(%rbx) - vmovdqa %ymm13, 2*256+5*32(%rbx) - vmovdqa %ymm14, 2*256+6*32(%rbx) - vmovdqa %ymm15, 2*256+7*32(%rbx) - vpxor 2*256+0*32(%rsp), %ymm12, %ymm12 - vpxor 2*256+1*32(%rsp), %ymm13, %ymm13 - vpxor 2*256+2*32(%rsp), %ymm14, %ymm14 - vpxor 2*256+3*32(%rsp), %ymm15, %ymm15 - vmovdqa %ymm0, 0*256+0*32(%rbx) - vmovdqa %ymm1, 0*256+1*32(%rbx) - vmovdqa %ymm2, 0*256+2*32(%rbx) - vmovdqa %ymm3, 0*256+3*32(%rbx) - vmovdqa %ymm8, 1*256+0*32(%rbx) - vmovdqa %ymm9, 1*256+1*32(%rbx) - vmovdqa %ymm10, 1*256+2*32(%rbx) - vmovdqa %ymm11, 1*256+3*32(%rbx) - vmovdqa %ymm12, 2*256+0*32(%rbx) - vmovdqa %ymm13, 2*256+1*32(%rbx) - vmovdqa %ymm14, 2*256+2*32(%rbx) - vmovdqa %ymm15, 2*256+3*32(%rbx) - - salsa8_core_6way_avx2 - vpaddd 0*256+0*32(%rbx), %ymm0, %ymm0 - vpaddd 0*256+1*32(%rbx), %ymm1, %ymm1 - vpaddd 0*256+2*32(%rbx), %ymm2, %ymm2 - vpaddd 0*256+3*32(%rbx), %ymm3, %ymm3 - vpaddd 1*256+0*32(%rbx), %ymm8, %ymm8 - vpaddd 1*256+1*32(%rbx), %ymm9, %ymm9 - vpaddd 1*256+2*32(%rbx), %ymm10, %ymm10 - vpaddd 1*256+3*32(%rbx), %ymm11, %ymm11 - vpaddd 2*256+0*32(%rbx), %ymm12, %ymm12 - vpaddd 2*256+1*32(%rbx), %ymm13, %ymm13 - vpaddd 2*256+2*32(%rbx), %ymm14, %ymm14 - vpaddd 2*256+3*32(%rbx), %ymm15, %ymm15 - vmovdqa %ymm0, 0*256+0*32(%rsp) - vmovdqa %ymm1, 0*256+1*32(%rsp) - vmovdqa %ymm2, 0*256+2*32(%rsp) - vmovdqa %ymm3, 0*256+3*32(%rsp) - vmovdqa %ymm8, 1*256+0*32(%rsp) - vmovdqa %ymm9, 1*256+1*32(%rsp) - vmovdqa %ymm10, 1*256+2*32(%rsp) - vmovdqa %ymm11, 1*256+3*32(%rsp) - vmovdqa %ymm12, 2*256+0*32(%rsp) - vmovdqa %ymm13, 2*256+1*32(%rsp) - vmovdqa %ymm14, 2*256+2*32(%rsp) - vmovdqa %ymm15, 2*256+3*32(%rsp) - - vpxor 0*256+4*32(%rbx), %ymm0, %ymm0 - vpxor 0*256+5*32(%rbx), %ymm1, %ymm1 - vpxor 0*256+6*32(%rbx), %ymm2, %ymm2 - vpxor 0*256+7*32(%rbx), %ymm3, %ymm3 - vpxor 1*256+4*32(%rbx), %ymm8, %ymm8 - vpxor 1*256+5*32(%rbx), %ymm9, %ymm9 - vpxor 1*256+6*32(%rbx), %ymm10, %ymm10 - vpxor 1*256+7*32(%rbx), %ymm11, %ymm11 - vpxor 2*256+4*32(%rbx), %ymm12, %ymm12 - vpxor 2*256+5*32(%rbx), %ymm13, %ymm13 - vpxor 2*256+6*32(%rbx), %ymm14, %ymm14 - vpxor 2*256+7*32(%rbx), %ymm15, %ymm15 - vmovdqa %ymm0, 0*256+4*32(%rsp) - vmovdqa %ymm1, 0*256+5*32(%rsp) - vmovdqa %ymm2, 0*256+6*32(%rsp) - vmovdqa %ymm3, 0*256+7*32(%rsp) - vmovdqa %ymm8, 1*256+4*32(%rsp) - vmovdqa %ymm9, 1*256+5*32(%rsp) - vmovdqa %ymm10, 1*256+6*32(%rsp) - vmovdqa %ymm11, 1*256+7*32(%rsp) - vmovdqa %ymm12, 2*256+4*32(%rsp) - vmovdqa %ymm13, 2*256+5*32(%rsp) - vmovdqa %ymm14, 2*256+6*32(%rsp) - vmovdqa %ymm15, 2*256+7*32(%rsp) - salsa8_core_6way_avx2 - vpaddd 0*256+4*32(%rsp), %ymm0, %ymm0 - vpaddd 0*256+5*32(%rsp), %ymm1, %ymm1 - vpaddd 0*256+6*32(%rsp), %ymm2, %ymm2 - vpaddd 0*256+7*32(%rsp), %ymm3, %ymm3 - vpaddd 1*256+4*32(%rsp), %ymm8, %ymm8 - vpaddd 1*256+5*32(%rsp), %ymm9, %ymm9 - vpaddd 1*256+6*32(%rsp), %ymm10, %ymm10 - vpaddd 1*256+7*32(%rsp), %ymm11, %ymm11 - vpaddd 2*256+4*32(%rsp), %ymm12, %ymm12 - vpaddd 2*256+5*32(%rsp), %ymm13, %ymm13 - vpaddd 2*256+6*32(%rsp), %ymm14, %ymm14 - vpaddd 2*256+7*32(%rsp), %ymm15, %ymm15 - - addq $6*128, %rbx - cmpq %rax, %rbx - jne scrypt_core_6way_avx2_loop1 - - vmovdqa %ymm0, 0*256+4*32(%rsp) - vmovdqa %ymm1, 0*256+5*32(%rsp) - vmovdqa %ymm2, 0*256+6*32(%rsp) - vmovdqa %ymm3, 0*256+7*32(%rsp) - vmovdqa %ymm8, 1*256+4*32(%rsp) - vmovdqa %ymm9, 1*256+5*32(%rsp) - vmovdqa %ymm10, 1*256+6*32(%rsp) - vmovdqa %ymm11, 1*256+7*32(%rsp) - vmovdqa %ymm12, 2*256+4*32(%rsp) - vmovdqa %ymm13, 2*256+5*32(%rsp) - vmovdqa %ymm14, 2*256+6*32(%rsp) - vmovdqa %ymm15, 2*256+7*32(%rsp) - - movq %r8, %rcx - leaq -1(%r8), %r11 -scrypt_core_6way_avx2_loop2: - vmovd %xmm0, %ebp - vmovd %xmm8, %ebx - vmovd %xmm12, %eax - vextracti128 $1, %ymm0, %xmm4 - vextracti128 $1, %ymm8, %xmm5 - vextracti128 $1, %ymm12, %xmm6 - vmovd %xmm4, %r8d - vmovd %xmm5, %r9d - vmovd %xmm6, %r10d - vpxor 0*256+0*32(%rsp), %ymm0, %ymm0 - vpxor 0*256+1*32(%rsp), %ymm1, %ymm1 - vpxor 0*256+2*32(%rsp), %ymm2, %ymm2 - vpxor 0*256+3*32(%rsp), %ymm3, %ymm3 - vpxor 1*256+0*32(%rsp), %ymm8, %ymm8 - vpxor 1*256+1*32(%rsp), %ymm9, %ymm9 - vpxor 1*256+2*32(%rsp), %ymm10, %ymm10 - vpxor 1*256+3*32(%rsp), %ymm11, %ymm11 - vpxor 2*256+0*32(%rsp), %ymm12, %ymm12 - vpxor 2*256+1*32(%rsp), %ymm13, %ymm13 - vpxor 2*256+2*32(%rsp), %ymm14, %ymm14 - vpxor 2*256+3*32(%rsp), %ymm15, %ymm15 - andl %r11d, %ebp - leaq 0(%rbp, %rbp, 2), %rbp - shll $8, %ebp - andl %r11d, %ebx - leaq 1(%rbx, %rbx, 2), %rbx - shll $8, %ebx - andl %r11d, %eax - leaq 2(%rax, %rax, 2), %rax - shll $8, %eax - andl %r11d, %r8d - leaq 0(%r8, %r8, 2), %r8 - shll $8, %r8d - andl %r11d, %r9d - leaq 1(%r9, %r9, 2), %r9 - shll $8, %r9d - andl %r11d, %r10d - leaq 2(%r10, %r10, 2), %r10 - shll $8, %r10d - vmovdqa 0*32(%rsi, %rbp), %xmm4 - vinserti128 $1, 0*32+16(%rsi, %r8), %ymm4, %ymm4 - vmovdqa 1*32(%rsi, %rbp), %xmm5 - vinserti128 $1, 1*32+16(%rsi, %r8), %ymm5, %ymm5 - vmovdqa 2*32(%rsi, %rbp), %xmm6 - vinserti128 $1, 2*32+16(%rsi, %r8), %ymm6, %ymm6 - vmovdqa 3*32(%rsi, %rbp), %xmm7 - vinserti128 $1, 3*32+16(%rsi, %r8), %ymm7, %ymm7 - vpxor %ymm4, %ymm0, %ymm0 - vpxor %ymm5, %ymm1, %ymm1 - vpxor %ymm6, %ymm2, %ymm2 - vpxor %ymm7, %ymm3, %ymm3 - vmovdqa 0*32(%rsi, %rbx), %xmm4 - vinserti128 $1, 0*32+16(%rsi, %r9), %ymm4, %ymm4 - vmovdqa 1*32(%rsi, %rbx), %xmm5 - vinserti128 $1, 1*32+16(%rsi, %r9), %ymm5, %ymm5 - vmovdqa 2*32(%rsi, %rbx), %xmm6 - vinserti128 $1, 2*32+16(%rsi, %r9), %ymm6, %ymm6 - vmovdqa 3*32(%rsi, %rbx), %xmm7 - vinserti128 $1, 3*32+16(%rsi, %r9), %ymm7, %ymm7 - vpxor %ymm4, %ymm8, %ymm8 - vpxor %ymm5, %ymm9, %ymm9 - vpxor %ymm6, %ymm10, %ymm10 - vpxor %ymm7, %ymm11, %ymm11 - vmovdqa 0*32(%rsi, %rax), %xmm4 - vinserti128 $1, 0*32+16(%rsi, %r10), %ymm4, %ymm4 - vmovdqa 1*32(%rsi, %rax), %xmm5 - vinserti128 $1, 1*32+16(%rsi, %r10), %ymm5, %ymm5 - vmovdqa 2*32(%rsi, %rax), %xmm6 - vinserti128 $1, 2*32+16(%rsi, %r10), %ymm6, %ymm6 - vmovdqa 3*32(%rsi, %rax), %xmm7 - vinserti128 $1, 3*32+16(%rsi, %r10), %ymm7, %ymm7 - vpxor %ymm4, %ymm12, %ymm12 - vpxor %ymm5, %ymm13, %ymm13 - vpxor %ymm6, %ymm14, %ymm14 - vpxor %ymm7, %ymm15, %ymm15 - - vmovdqa %ymm0, 0*256+0*32(%rsp) - vmovdqa %ymm1, 0*256+1*32(%rsp) - vmovdqa %ymm2, 0*256+2*32(%rsp) - vmovdqa %ymm3, 0*256+3*32(%rsp) - vmovdqa %ymm8, 1*256+0*32(%rsp) - vmovdqa %ymm9, 1*256+1*32(%rsp) - vmovdqa %ymm10, 1*256+2*32(%rsp) - vmovdqa %ymm11, 1*256+3*32(%rsp) - vmovdqa %ymm12, 2*256+0*32(%rsp) - vmovdqa %ymm13, 2*256+1*32(%rsp) - vmovdqa %ymm14, 2*256+2*32(%rsp) - vmovdqa %ymm15, 2*256+3*32(%rsp) - salsa8_core_6way_avx2 - vpaddd 0*256+0*32(%rsp), %ymm0, %ymm0 - vpaddd 0*256+1*32(%rsp), %ymm1, %ymm1 - vpaddd 0*256+2*32(%rsp), %ymm2, %ymm2 - vpaddd 0*256+3*32(%rsp), %ymm3, %ymm3 - vpaddd 1*256+0*32(%rsp), %ymm8, %ymm8 - vpaddd 1*256+1*32(%rsp), %ymm9, %ymm9 - vpaddd 1*256+2*32(%rsp), %ymm10, %ymm10 - vpaddd 1*256+3*32(%rsp), %ymm11, %ymm11 - vpaddd 2*256+0*32(%rsp), %ymm12, %ymm12 - vpaddd 2*256+1*32(%rsp), %ymm13, %ymm13 - vpaddd 2*256+2*32(%rsp), %ymm14, %ymm14 - vpaddd 2*256+3*32(%rsp), %ymm15, %ymm15 - vmovdqa %ymm0, 0*256+0*32(%rsp) - vmovdqa %ymm1, 0*256+1*32(%rsp) - vmovdqa %ymm2, 0*256+2*32(%rsp) - vmovdqa %ymm3, 0*256+3*32(%rsp) - vmovdqa %ymm8, 1*256+0*32(%rsp) - vmovdqa %ymm9, 1*256+1*32(%rsp) - vmovdqa %ymm10, 1*256+2*32(%rsp) - vmovdqa %ymm11, 1*256+3*32(%rsp) - vmovdqa %ymm12, 2*256+0*32(%rsp) - vmovdqa %ymm13, 2*256+1*32(%rsp) - vmovdqa %ymm14, 2*256+2*32(%rsp) - vmovdqa %ymm15, 2*256+3*32(%rsp) - - vmovdqa 4*32(%rsi, %rbp), %xmm4 - vinserti128 $1, 4*32+16(%rsi, %r8), %ymm4, %ymm4 - vmovdqa 5*32(%rsi, %rbp), %xmm5 - vinserti128 $1, 5*32+16(%rsi, %r8), %ymm5, %ymm5 - vmovdqa 6*32(%rsi, %rbp), %xmm6 - vinserti128 $1, 6*32+16(%rsi, %r8), %ymm6, %ymm6 - vmovdqa 7*32(%rsi, %rbp), %xmm7 - vinserti128 $1, 7*32+16(%rsi, %r8), %ymm7, %ymm7 - vpxor %ymm4, %ymm0, %ymm0 - vpxor %ymm5, %ymm1, %ymm1 - vpxor %ymm6, %ymm2, %ymm2 - vpxor %ymm7, %ymm3, %ymm3 - vmovdqa 4*32(%rsi, %rbx), %xmm4 - vinserti128 $1, 4*32+16(%rsi, %r9), %ymm4, %ymm4 - vmovdqa 5*32(%rsi, %rbx), %xmm5 - vinserti128 $1, 5*32+16(%rsi, %r9), %ymm5, %ymm5 - vmovdqa 6*32(%rsi, %rbx), %xmm6 - vinserti128 $1, 6*32+16(%rsi, %r9), %ymm6, %ymm6 - vmovdqa 7*32(%rsi, %rbx), %xmm7 - vinserti128 $1, 7*32+16(%rsi, %r9), %ymm7, %ymm7 - vpxor %ymm4, %ymm8, %ymm8 - vpxor %ymm5, %ymm9, %ymm9 - vpxor %ymm6, %ymm10, %ymm10 - vpxor %ymm7, %ymm11, %ymm11 - vmovdqa 4*32(%rsi, %rax), %xmm4 - vinserti128 $1, 4*32+16(%rsi, %r10), %ymm4, %ymm4 - vmovdqa 5*32(%rsi, %rax), %xmm5 - vinserti128 $1, 5*32+16(%rsi, %r10), %ymm5, %ymm5 - vmovdqa 6*32(%rsi, %rax), %xmm6 - vinserti128 $1, 6*32+16(%rsi, %r10), %ymm6, %ymm6 - vmovdqa 7*32(%rsi, %rax), %xmm7 - vinserti128 $1, 7*32+16(%rsi, %r10), %ymm7, %ymm7 - vpxor %ymm4, %ymm12, %ymm12 - vpxor %ymm5, %ymm13, %ymm13 - vpxor %ymm6, %ymm14, %ymm14 - vpxor %ymm7, %ymm15, %ymm15 - vpxor 0*256+4*32(%rsp), %ymm0, %ymm0 - vpxor 0*256+5*32(%rsp), %ymm1, %ymm1 - vpxor 0*256+6*32(%rsp), %ymm2, %ymm2 - vpxor 0*256+7*32(%rsp), %ymm3, %ymm3 - vpxor 1*256+4*32(%rsp), %ymm8, %ymm8 - vpxor 1*256+5*32(%rsp), %ymm9, %ymm9 - vpxor 1*256+6*32(%rsp), %ymm10, %ymm10 - vpxor 1*256+7*32(%rsp), %ymm11, %ymm11 - vpxor 2*256+4*32(%rsp), %ymm12, %ymm12 - vpxor 2*256+5*32(%rsp), %ymm13, %ymm13 - vpxor 2*256+6*32(%rsp), %ymm14, %ymm14 - vpxor 2*256+7*32(%rsp), %ymm15, %ymm15 - vmovdqa %ymm0, 0*256+4*32(%rsp) - vmovdqa %ymm1, 0*256+5*32(%rsp) - vmovdqa %ymm2, 0*256+6*32(%rsp) - vmovdqa %ymm3, 0*256+7*32(%rsp) - vmovdqa %ymm8, 1*256+4*32(%rsp) - vmovdqa %ymm9, 1*256+5*32(%rsp) - vmovdqa %ymm10, 1*256+6*32(%rsp) - vmovdqa %ymm11, 1*256+7*32(%rsp) - vmovdqa %ymm12, 2*256+4*32(%rsp) - vmovdqa %ymm13, 2*256+5*32(%rsp) - vmovdqa %ymm14, 2*256+6*32(%rsp) - vmovdqa %ymm15, 2*256+7*32(%rsp) - salsa8_core_6way_avx2 - vpaddd 0*256+4*32(%rsp), %ymm0, %ymm0 - vpaddd 0*256+5*32(%rsp), %ymm1, %ymm1 - vpaddd 0*256+6*32(%rsp), %ymm2, %ymm2 - vpaddd 0*256+7*32(%rsp), %ymm3, %ymm3 - vpaddd 1*256+4*32(%rsp), %ymm8, %ymm8 - vpaddd 1*256+5*32(%rsp), %ymm9, %ymm9 - vpaddd 1*256+6*32(%rsp), %ymm10, %ymm10 - vpaddd 1*256+7*32(%rsp), %ymm11, %ymm11 - vpaddd 2*256+4*32(%rsp), %ymm12, %ymm12 - vpaddd 2*256+5*32(%rsp), %ymm13, %ymm13 - vpaddd 2*256+6*32(%rsp), %ymm14, %ymm14 - vpaddd 2*256+7*32(%rsp), %ymm15, %ymm15 - vmovdqa %ymm0, 0*256+4*32(%rsp) - vmovdqa %ymm1, 0*256+5*32(%rsp) - vmovdqa %ymm2, 0*256+6*32(%rsp) - vmovdqa %ymm3, 0*256+7*32(%rsp) - vmovdqa %ymm8, 1*256+4*32(%rsp) - vmovdqa %ymm9, 1*256+5*32(%rsp) - vmovdqa %ymm10, 1*256+6*32(%rsp) - vmovdqa %ymm11, 1*256+7*32(%rsp) - vmovdqa %ymm12, 2*256+4*32(%rsp) - vmovdqa %ymm13, 2*256+5*32(%rsp) - vmovdqa %ymm14, 2*256+6*32(%rsp) - vmovdqa %ymm15, 2*256+7*32(%rsp) - - subq $1, %rcx - ja scrypt_core_6way_avx2_loop2 - - scrypt_shuffle_unpack2 %rsp, 0*128, %rdi, 0*256+0 - scrypt_shuffle_unpack2 %rsp, 1*128, %rdi, 0*256+64 - scrypt_shuffle_unpack2 %rsp, 2*128, %rdi, 1*256+0 - scrypt_shuffle_unpack2 %rsp, 3*128, %rdi, 1*256+64 - scrypt_shuffle_unpack2 %rsp, 4*128, %rdi, 2*256+0 - scrypt_shuffle_unpack2 %rsp, 5*128, %rdi, 2*256+64 - - scrypt_core_6way_cleanup - ret - -#endif /* USE_AVX2 */ - -#endif diff --git a/asm/scrypt-x86.S b/asm/scrypt-x86.S deleted file mode 100644 index 9cff123d..00000000 --- a/asm/scrypt-x86.S +++ /dev/null @@ -1,830 +0,0 @@ -/* - * Copyright 2011-2012, 2014 pooler@litecoinpool.org - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include - -#if defined(__linux__) && defined(__ELF__) - .section .note.GNU-stack,"",%progbits -#endif - -#if defined(USE_ASM) && defined(__i386__) - -.macro scrypt_shuffle src, so, dest, do - movl \so+60(\src), %eax - movl \so+44(\src), %ebx - movl \so+28(\src), %ecx - movl \so+12(\src), %edx - movl %eax, \do+12(\dest) - movl %ebx, \do+28(\dest) - movl %ecx, \do+44(\dest) - movl %edx, \do+60(\dest) - movl \so+40(\src), %eax - movl \so+8(\src), %ebx - movl \so+48(\src), %ecx - movl \so+16(\src), %edx - movl %eax, \do+8(\dest) - movl %ebx, \do+40(\dest) - movl %ecx, \do+16(\dest) - movl %edx, \do+48(\dest) - movl \so+20(\src), %eax - movl \so+4(\src), %ebx - movl \so+52(\src), %ecx - movl \so+36(\src), %edx - movl %eax, \do+4(\dest) - movl %ebx, \do+20(\dest) - movl %ecx, \do+36(\dest) - movl %edx, \do+52(\dest) - movl \so+0(\src), %eax - movl \so+24(\src), %ebx - movl \so+32(\src), %ecx - movl \so+56(\src), %edx - movl %eax, \do+0(\dest) - movl %ebx, \do+24(\dest) - movl %ecx, \do+32(\dest) - movl %edx, \do+56(\dest) -.endm - -.macro salsa8_core_gen_quadround - movl 52(%esp), %ecx - movl 4(%esp), %edx - movl 20(%esp), %ebx - movl 8(%esp), %esi - leal (%ecx, %edx), %edi - roll $7, %edi - xorl %edi, %ebx - movl %ebx, 4(%esp) - movl 36(%esp), %edi - leal (%edx, %ebx), %ebp - roll $9, %ebp - xorl %ebp, %edi - movl 24(%esp), %ebp - movl %edi, 8(%esp) - addl %edi, %ebx - roll $13, %ebx - xorl %ebx, %ecx - movl 40(%esp), %ebx - movl %ecx, 20(%esp) - addl %edi, %ecx - roll $18, %ecx - leal (%esi, %ebp), %edi - roll $7, %edi - xorl %edi, %ebx - movl %ebx, 24(%esp) - movl 56(%esp), %edi - xorl %ecx, %edx - leal (%ebp, %ebx), %ecx - roll $9, %ecx - xorl %ecx, %edi - movl %edi, 36(%esp) - movl 28(%esp), %ecx - movl %edx, 28(%esp) - movl 44(%esp), %edx - addl %edi, %ebx - roll $13, %ebx - xorl %ebx, %esi - movl 60(%esp), %ebx - movl %esi, 40(%esp) - addl %edi, %esi - roll $18, %esi - leal (%ecx, %edx), %edi - roll $7, %edi - xorl %edi, %ebx - movl %ebx, 44(%esp) - movl 12(%esp), %edi - xorl %esi, %ebp - leal (%edx, %ebx), %esi - roll $9, %esi - xorl %esi, %edi - movl %edi, 12(%esp) - movl 48(%esp), %esi - movl %ebp, 48(%esp) - movl 64(%esp), %ebp - addl %edi, %ebx - roll $13, %ebx - xorl %ebx, %ecx - movl 16(%esp), %ebx - movl %ecx, 16(%esp) - addl %edi, %ecx - roll $18, %ecx - leal (%esi, %ebp), %edi - roll $7, %edi - xorl %edi, %ebx - movl 32(%esp), %edi - xorl %ecx, %edx - leal (%ebp, %ebx), %ecx - roll $9, %ecx - xorl %ecx, %edi - movl %edi, 32(%esp) - movl %ebx, %ecx - movl %edx, 52(%esp) - movl 28(%esp), %edx - addl %edi, %ebx - roll $13, %ebx - xorl %ebx, %esi - movl 40(%esp), %ebx - movl %esi, 28(%esp) - addl %edi, %esi - roll $18, %esi - leal (%ecx, %edx), %edi - roll $7, %edi - xorl %edi, %ebx - movl %ebx, 40(%esp) - movl 12(%esp), %edi - xorl %esi, %ebp - leal (%edx, %ebx), %esi - roll $9, %esi - xorl %esi, %edi - movl %edi, 12(%esp) - movl 4(%esp), %esi - movl %ebp, 4(%esp) - movl 48(%esp), %ebp - addl %edi, %ebx - roll $13, %ebx - xorl %ebx, %ecx - movl 16(%esp), %ebx - movl %ecx, 16(%esp) - addl %edi, %ecx - roll $18, %ecx - leal (%esi, %ebp), %edi - roll $7, %edi - xorl %edi, %ebx - movl %ebx, 48(%esp) - movl 32(%esp), %edi - xorl %ecx, %edx - leal (%ebp, %ebx), %ecx - roll $9, %ecx - xorl %ecx, %edi - movl %edi, 32(%esp) - movl 24(%esp), %ecx - movl %edx, 24(%esp) - movl 52(%esp), %edx - addl %edi, %ebx - roll $13, %ebx - xorl %ebx, %esi - movl 28(%esp), %ebx - movl %esi, 28(%esp) - addl %edi, %esi - roll $18, %esi - leal (%ecx, %edx), %edi - roll $7, %edi - xorl %edi, %ebx - movl %ebx, 52(%esp) - movl 8(%esp), %edi - xorl %esi, %ebp - leal (%edx, %ebx), %esi - roll $9, %esi - xorl %esi, %edi - movl %edi, 8(%esp) - movl 44(%esp), %esi - movl %ebp, 44(%esp) - movl 4(%esp), %ebp - addl %edi, %ebx - roll $13, %ebx - xorl %ebx, %ecx - movl 20(%esp), %ebx - movl %ecx, 4(%esp) - addl %edi, %ecx - roll $18, %ecx - leal (%esi, %ebp), %edi - roll $7, %edi - xorl %edi, %ebx - movl 36(%esp), %edi - xorl %ecx, %edx - leal (%ebp, %ebx), %ecx - roll $9, %ecx - xorl %ecx, %edi - movl %edi, 20(%esp) - movl %ebx, %ecx - movl %edx, 36(%esp) - movl 24(%esp), %edx - addl %edi, %ebx - roll $13, %ebx - xorl %ebx, %esi - movl 28(%esp), %ebx - movl %esi, 24(%esp) - addl %edi, %esi - roll $18, %esi - leal (%ecx, %edx), %edi - roll $7, %edi - xorl %edi, %ebx - movl %ebx, 28(%esp) - xorl %esi, %ebp - movl 8(%esp), %esi - leal (%edx, %ebx), %edi - roll $9, %edi - xorl %edi, %esi - movl 40(%esp), %edi - movl %ebp, 8(%esp) - movl 44(%esp), %ebp - movl %esi, 40(%esp) - addl %esi, %ebx - roll $13, %ebx - xorl %ebx, %ecx - movl 4(%esp), %ebx - movl %ecx, 44(%esp) - addl %esi, %ecx - roll $18, %ecx - leal (%edi, %ebp), %esi - roll $7, %esi - xorl %esi, %ebx - movl %ebx, 4(%esp) - movl 20(%esp), %esi - xorl %ecx, %edx - leal (%ebp, %ebx), %ecx - roll $9, %ecx - xorl %ecx, %esi - movl %esi, 56(%esp) - movl 48(%esp), %ecx - movl %edx, 20(%esp) - movl 36(%esp), %edx - addl %esi, %ebx - roll $13, %ebx - xorl %ebx, %edi - movl 24(%esp), %ebx - movl %edi, 24(%esp) - addl %esi, %edi - roll $18, %edi - leal (%ecx, %edx), %esi - roll $7, %esi - xorl %esi, %ebx - movl %ebx, 60(%esp) - movl 12(%esp), %esi - xorl %edi, %ebp - leal (%edx, %ebx), %edi - roll $9, %edi - xorl %edi, %esi - movl %esi, 12(%esp) - movl 52(%esp), %edi - movl %ebp, 36(%esp) - movl 8(%esp), %ebp - addl %esi, %ebx - roll $13, %ebx - xorl %ebx, %ecx - movl 16(%esp), %ebx - movl %ecx, 16(%esp) - addl %esi, %ecx - roll $18, %ecx - leal (%edi, %ebp), %esi - roll $7, %esi - xorl %esi, %ebx - movl 32(%esp), %esi - xorl %ecx, %edx - leal (%ebp, %ebx), %ecx - roll $9, %ecx - xorl %ecx, %esi - movl %esi, 32(%esp) - movl %ebx, %ecx - movl %edx, 48(%esp) - movl 20(%esp), %edx - addl %esi, %ebx - roll $13, %ebx - xorl %ebx, %edi - movl 24(%esp), %ebx - movl %edi, 20(%esp) - addl %esi, %edi - roll $18, %edi - leal (%ecx, %edx), %esi - roll $7, %esi - xorl %esi, %ebx - movl %ebx, 8(%esp) - movl 12(%esp), %esi - xorl %edi, %ebp - leal (%edx, %ebx), %edi - roll $9, %edi - xorl %edi, %esi - movl %esi, 12(%esp) - movl 28(%esp), %edi - movl %ebp, 52(%esp) - movl 36(%esp), %ebp - addl %esi, %ebx - roll $13, %ebx - xorl %ebx, %ecx - movl 16(%esp), %ebx - movl %ecx, 16(%esp) - addl %esi, %ecx - roll $18, %ecx - leal (%edi, %ebp), %esi - roll $7, %esi - xorl %esi, %ebx - movl %ebx, 28(%esp) - movl 32(%esp), %esi - xorl %ecx, %edx - leal (%ebp, %ebx), %ecx - roll $9, %ecx - xorl %ecx, %esi - movl %esi, 32(%esp) - movl 4(%esp), %ecx - movl %edx, 4(%esp) - movl 48(%esp), %edx - addl %esi, %ebx - roll $13, %ebx - xorl %ebx, %edi - movl 20(%esp), %ebx - movl %edi, 20(%esp) - addl %esi, %edi - roll $18, %edi - leal (%ecx, %edx), %esi - roll $7, %esi - xorl %esi, %ebx - movl %ebx, 48(%esp) - movl 40(%esp), %esi - xorl %edi, %ebp - leal (%edx, %ebx), %edi - roll $9, %edi - xorl %edi, %esi - movl %esi, 36(%esp) - movl 60(%esp), %edi - movl %ebp, 24(%esp) - movl 52(%esp), %ebp - addl %esi, %ebx - roll $13, %ebx - xorl %ebx, %ecx - movl 44(%esp), %ebx - movl %ecx, 40(%esp) - addl %esi, %ecx - roll $18, %ecx - leal (%edi, %ebp), %esi - roll $7, %esi - xorl %esi, %ebx - movl %ebx, 52(%esp) - movl 56(%esp), %esi - xorl %ecx, %edx - leal (%ebp, %ebx), %ecx - roll $9, %ecx - xorl %ecx, %esi - movl %esi, 56(%esp) - addl %esi, %ebx - movl %edx, 44(%esp) - roll $13, %ebx - xorl %ebx, %edi - movl %edi, 60(%esp) - addl %esi, %edi - roll $18, %edi - xorl %edi, %ebp - movl %ebp, 64(%esp) -.endm - - .text - .p2align 5 -salsa8_core_gen: - salsa8_core_gen_quadround - salsa8_core_gen_quadround - ret - - - .text - .p2align 5 - .globl scrypt_core - .globl _scrypt_core -scrypt_core: -_scrypt_core: - pushl %ebx - pushl %ebp - pushl %edi - pushl %esi - - /* Check for SSE2 availability */ - movl $1, %eax - cpuid - andl $0x04000000, %edx - jnz scrypt_core_sse2 - -scrypt_core_gen: - movl 20(%esp), %edi - movl 24(%esp), %esi - movl 28(%esp), %ecx - subl $72, %esp - -.macro scrypt_core_macro1a p, q - movl \p(%edi), %eax - movl \q(%edi), %edx - movl %eax, \p(%esi) - movl %edx, \q(%esi) - xorl %edx, %eax - movl %eax, \p(%edi) - movl %eax, \p(%esp) -.endm - -.macro scrypt_core_macro1b p, q - movl \p(%edi), %eax - xorl \p(%esi, %edx), %eax - movl \q(%edi), %ebx - xorl \q(%esi, %edx), %ebx - movl %ebx, \q(%edi) - xorl %ebx, %eax - movl %eax, \p(%edi) - movl %eax, \p(%esp) -.endm - -.macro scrypt_core_macro2 p, q - movl \p(%esp), %eax - addl \p(%edi), %eax - movl %eax, \p(%edi) - xorl \q(%edi), %eax - movl %eax, \q(%edi) - movl %eax, \p(%esp) -.endm - -.macro scrypt_core_macro3 p, q - movl \p(%esp), %eax - addl \q(%edi), %eax - movl %eax, \q(%edi) -.endm - - shll $7, %ecx - addl %esi, %ecx -scrypt_core_gen_loop1: - movl %esi, 64(%esp) - movl %ecx, 68(%esp) - - scrypt_core_macro1a 0, 64 - scrypt_core_macro1a 4, 68 - scrypt_core_macro1a 8, 72 - scrypt_core_macro1a 12, 76 - scrypt_core_macro1a 16, 80 - scrypt_core_macro1a 20, 84 - scrypt_core_macro1a 24, 88 - scrypt_core_macro1a 28, 92 - scrypt_core_macro1a 32, 96 - scrypt_core_macro1a 36, 100 - scrypt_core_macro1a 40, 104 - scrypt_core_macro1a 44, 108 - scrypt_core_macro1a 48, 112 - scrypt_core_macro1a 52, 116 - scrypt_core_macro1a 56, 120 - scrypt_core_macro1a 60, 124 - - call salsa8_core_gen - - movl 92(%esp), %edi - scrypt_core_macro2 0, 64 - scrypt_core_macro2 4, 68 - scrypt_core_macro2 8, 72 - scrypt_core_macro2 12, 76 - scrypt_core_macro2 16, 80 - scrypt_core_macro2 20, 84 - scrypt_core_macro2 24, 88 - scrypt_core_macro2 28, 92 - scrypt_core_macro2 32, 96 - scrypt_core_macro2 36, 100 - scrypt_core_macro2 40, 104 - scrypt_core_macro2 44, 108 - scrypt_core_macro2 48, 112 - scrypt_core_macro2 52, 116 - scrypt_core_macro2 56, 120 - scrypt_core_macro2 60, 124 - - call salsa8_core_gen - - movl 92(%esp), %edi - scrypt_core_macro3 0, 64 - scrypt_core_macro3 4, 68 - scrypt_core_macro3 8, 72 - scrypt_core_macro3 12, 76 - scrypt_core_macro3 16, 80 - scrypt_core_macro3 20, 84 - scrypt_core_macro3 24, 88 - scrypt_core_macro3 28, 92 - scrypt_core_macro3 32, 96 - scrypt_core_macro3 36, 100 - scrypt_core_macro3 40, 104 - scrypt_core_macro3 44, 108 - scrypt_core_macro3 48, 112 - scrypt_core_macro3 52, 116 - scrypt_core_macro3 56, 120 - scrypt_core_macro3 60, 124 - - movl 64(%esp), %esi - movl 68(%esp), %ecx - addl $128, %esi - cmpl %ecx, %esi - jne scrypt_core_gen_loop1 - - movl 96(%esp), %esi - movl 100(%esp), %ecx - movl %ecx, %eax - subl $1, %eax - movl %eax, 100(%esp) -scrypt_core_gen_loop2: - movl %ecx, 68(%esp) - - movl 64(%edi), %edx - andl 100(%esp), %edx - shll $7, %edx - - scrypt_core_macro1b 0, 64 - scrypt_core_macro1b 4, 68 - scrypt_core_macro1b 8, 72 - scrypt_core_macro1b 12, 76 - scrypt_core_macro1b 16, 80 - scrypt_core_macro1b 20, 84 - scrypt_core_macro1b 24, 88 - scrypt_core_macro1b 28, 92 - scrypt_core_macro1b 32, 96 - scrypt_core_macro1b 36, 100 - scrypt_core_macro1b 40, 104 - scrypt_core_macro1b 44, 108 - scrypt_core_macro1b 48, 112 - scrypt_core_macro1b 52, 116 - scrypt_core_macro1b 56, 120 - scrypt_core_macro1b 60, 124 - - call salsa8_core_gen - - movl 92(%esp), %edi - scrypt_core_macro2 0, 64 - scrypt_core_macro2 4, 68 - scrypt_core_macro2 8, 72 - scrypt_core_macro2 12, 76 - scrypt_core_macro2 16, 80 - scrypt_core_macro2 20, 84 - scrypt_core_macro2 24, 88 - scrypt_core_macro2 28, 92 - scrypt_core_macro2 32, 96 - scrypt_core_macro2 36, 100 - scrypt_core_macro2 40, 104 - scrypt_core_macro2 44, 108 - scrypt_core_macro2 48, 112 - scrypt_core_macro2 52, 116 - scrypt_core_macro2 56, 120 - scrypt_core_macro2 60, 124 - - call salsa8_core_gen - - movl 92(%esp), %edi - movl 96(%esp), %esi - scrypt_core_macro3 0, 64 - scrypt_core_macro3 4, 68 - scrypt_core_macro3 8, 72 - scrypt_core_macro3 12, 76 - scrypt_core_macro3 16, 80 - scrypt_core_macro3 20, 84 - scrypt_core_macro3 24, 88 - scrypt_core_macro3 28, 92 - scrypt_core_macro3 32, 96 - scrypt_core_macro3 36, 100 - scrypt_core_macro3 40, 104 - scrypt_core_macro3 44, 108 - scrypt_core_macro3 48, 112 - scrypt_core_macro3 52, 116 - scrypt_core_macro3 56, 120 - scrypt_core_macro3 60, 124 - - movl 68(%esp), %ecx - subl $1, %ecx - ja scrypt_core_gen_loop2 - - addl $72, %esp - popl %esi - popl %edi - popl %ebp - popl %ebx - ret - - -.macro salsa8_core_sse2_doubleround - movdqa %xmm1, %xmm4 - paddd %xmm0, %xmm4 - movdqa %xmm4, %xmm5 - pslld $7, %xmm4 - psrld $25, %xmm5 - pxor %xmm4, %xmm3 - movdqa %xmm0, %xmm4 - pxor %xmm5, %xmm3 - - paddd %xmm3, %xmm4 - movdqa %xmm4, %xmm5 - pslld $9, %xmm4 - psrld $23, %xmm5 - pxor %xmm4, %xmm2 - movdqa %xmm3, %xmm4 - pxor %xmm5, %xmm2 - pshufd $0x93, %xmm3, %xmm3 - - paddd %xmm2, %xmm4 - movdqa %xmm4, %xmm5 - pslld $13, %xmm4 - psrld $19, %xmm5 - pxor %xmm4, %xmm1 - movdqa %xmm2, %xmm4 - pxor %xmm5, %xmm1 - pshufd $0x4e, %xmm2, %xmm2 - - paddd %xmm1, %xmm4 - movdqa %xmm4, %xmm5 - pslld $18, %xmm4 - psrld $14, %xmm5 - pxor %xmm4, %xmm0 - movdqa %xmm3, %xmm4 - pxor %xmm5, %xmm0 - pshufd $0x39, %xmm1, %xmm1 - - paddd %xmm0, %xmm4 - movdqa %xmm4, %xmm5 - pslld $7, %xmm4 - psrld $25, %xmm5 - pxor %xmm4, %xmm1 - movdqa %xmm0, %xmm4 - pxor %xmm5, %xmm1 - - paddd %xmm1, %xmm4 - movdqa %xmm4, %xmm5 - pslld $9, %xmm4 - psrld $23, %xmm5 - pxor %xmm4, %xmm2 - movdqa %xmm1, %xmm4 - pxor %xmm5, %xmm2 - pshufd $0x93, %xmm1, %xmm1 - - paddd %xmm2, %xmm4 - movdqa %xmm4, %xmm5 - pslld $13, %xmm4 - psrld $19, %xmm5 - pxor %xmm4, %xmm3 - movdqa %xmm2, %xmm4 - pxor %xmm5, %xmm3 - pshufd $0x4e, %xmm2, %xmm2 - - paddd %xmm3, %xmm4 - movdqa %xmm4, %xmm5 - pslld $18, %xmm4 - psrld $14, %xmm5 - pxor %xmm4, %xmm0 - pshufd $0x39, %xmm3, %xmm3 - pxor %xmm5, %xmm0 -.endm - -.macro salsa8_core_sse2 - salsa8_core_sse2_doubleround - salsa8_core_sse2_doubleround - salsa8_core_sse2_doubleround - salsa8_core_sse2_doubleround -.endm - - .p2align 5 -scrypt_core_sse2: - movl 20(%esp), %edi - movl 24(%esp), %esi - movl %esp, %ebp - subl $128, %esp - andl $-16, %esp - - scrypt_shuffle %edi, 0, %esp, 0 - scrypt_shuffle %edi, 64, %esp, 64 - - movdqa 96(%esp), %xmm6 - movdqa 112(%esp), %xmm7 - - movl %esi, %edx - movl 28(%ebp), %ecx - shll $7, %ecx - addl %esi, %ecx -scrypt_core_sse2_loop1: - movdqa 0(%esp), %xmm0 - movdqa 16(%esp), %xmm1 - movdqa 32(%esp), %xmm2 - movdqa 48(%esp), %xmm3 - movdqa 64(%esp), %xmm4 - movdqa 80(%esp), %xmm5 - pxor %xmm4, %xmm0 - pxor %xmm5, %xmm1 - movdqa %xmm0, 0(%edx) - movdqa %xmm1, 16(%edx) - pxor %xmm6, %xmm2 - pxor %xmm7, %xmm3 - movdqa %xmm2, 32(%edx) - movdqa %xmm3, 48(%edx) - movdqa %xmm4, 64(%edx) - movdqa %xmm5, 80(%edx) - movdqa %xmm6, 96(%edx) - movdqa %xmm7, 112(%edx) - - salsa8_core_sse2 - paddd 0(%edx), %xmm0 - paddd 16(%edx), %xmm1 - paddd 32(%edx), %xmm2 - paddd 48(%edx), %xmm3 - movdqa %xmm0, 0(%esp) - movdqa %xmm1, 16(%esp) - movdqa %xmm2, 32(%esp) - movdqa %xmm3, 48(%esp) - - pxor 64(%esp), %xmm0 - pxor 80(%esp), %xmm1 - pxor %xmm6, %xmm2 - pxor %xmm7, %xmm3 - movdqa %xmm0, 64(%esp) - movdqa %xmm1, 80(%esp) - movdqa %xmm2, %xmm6 - movdqa %xmm3, %xmm7 - salsa8_core_sse2 - paddd 64(%esp), %xmm0 - paddd 80(%esp), %xmm1 - paddd %xmm2, %xmm6 - paddd %xmm3, %xmm7 - movdqa %xmm0, 64(%esp) - movdqa %xmm1, 80(%esp) - - addl $128, %edx - cmpl %ecx, %edx - jne scrypt_core_sse2_loop1 - - movdqa 64(%esp), %xmm4 - movdqa 80(%esp), %xmm5 - - movl 28(%ebp), %ecx - movl %ecx, %eax - subl $1, %eax -scrypt_core_sse2_loop2: - movd %xmm4, %edx - movdqa 0(%esp), %xmm0 - movdqa 16(%esp), %xmm1 - movdqa 32(%esp), %xmm2 - movdqa 48(%esp), %xmm3 - andl %eax, %edx - shll $7, %edx - pxor 0(%esi, %edx), %xmm0 - pxor 16(%esi, %edx), %xmm1 - pxor 32(%esi, %edx), %xmm2 - pxor 48(%esi, %edx), %xmm3 - - pxor %xmm4, %xmm0 - pxor %xmm5, %xmm1 - movdqa %xmm0, 0(%esp) - movdqa %xmm1, 16(%esp) - pxor %xmm6, %xmm2 - pxor %xmm7, %xmm3 - movdqa %xmm2, 32(%esp) - movdqa %xmm3, 48(%esp) - salsa8_core_sse2 - paddd 0(%esp), %xmm0 - paddd 16(%esp), %xmm1 - paddd 32(%esp), %xmm2 - paddd 48(%esp), %xmm3 - movdqa %xmm0, 0(%esp) - movdqa %xmm1, 16(%esp) - movdqa %xmm2, 32(%esp) - movdqa %xmm3, 48(%esp) - - pxor 64(%esi, %edx), %xmm0 - pxor 80(%esi, %edx), %xmm1 - pxor 96(%esi, %edx), %xmm2 - pxor 112(%esi, %edx), %xmm3 - pxor 64(%esp), %xmm0 - pxor 80(%esp), %xmm1 - pxor %xmm6, %xmm2 - pxor %xmm7, %xmm3 - movdqa %xmm0, 64(%esp) - movdqa %xmm1, 80(%esp) - movdqa %xmm2, %xmm6 - movdqa %xmm3, %xmm7 - salsa8_core_sse2 - paddd 64(%esp), %xmm0 - paddd 80(%esp), %xmm1 - paddd %xmm2, %xmm6 - paddd %xmm3, %xmm7 - movdqa %xmm0, %xmm4 - movdqa %xmm1, %xmm5 - movdqa %xmm0, 64(%esp) - movdqa %xmm1, 80(%esp) - - subl $1, %ecx - ja scrypt_core_sse2_loop2 - - movdqa %xmm6, 96(%esp) - movdqa %xmm7, 112(%esp) - - scrypt_shuffle %esp, 0, %edi, 0 - scrypt_shuffle %esp, 64, %edi, 64 - - movl %ebp, %esp - popl %esi - popl %edi - popl %ebp - popl %ebx - ret - -#endif diff --git a/asm/sha2-arm.S b/asm/sha2-arm.S deleted file mode 100644 index 6ba9fa97..00000000 --- a/asm/sha2-arm.S +++ /dev/null @@ -1,1583 +0,0 @@ -/* - * Copyright 2012 pooler@litecoinpool.org - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. See COPYING for more details. - */ - -#include - -#if defined(USE_ASM) && defined(__arm__) && defined(__APCS_32__) - -.macro sha256_k - .align 2 - .long 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5 - .long 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5 - .long 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3 - .long 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174 - .long 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc - .long 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da - .long 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7 - .long 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967 - .long 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13 - .long 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85 - .long 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3 - .long 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070 - .long 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5 - .long 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3 - .long 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208 - .long 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 -.endm - -.macro sha256_extend_doubleround_core i, rw, ra, rb, ry, rz - mov r12, \ry, ror #17 - add r11, r11, \ra - eor r12, r12, \ry, ror #19 - mov \ra, lr, ror #7 - eor r12, r12, \ry, lsr #10 - eor \ra, \ra, lr, ror #18 - add r12, r12, r11 - ldr r11, [\rw, #(\i+2)*4] - eor \ra, \ra, lr, lsr #3 - add \ra, \ra, r12 - - mov r12, \rz, ror #17 - str \ra, [\rw, #(\i+16)*4] - add lr, lr, \rb - eor r12, r12, \rz, ror #19 - mov \rb, r11, ror #7 - eor r12, r12, \rz, lsr #10 - eor \rb, \rb, r11, ror #18 - add lr, lr, r12 - eor \rb, \rb, r11, lsr #3 - add \rb, \rb, lr -.endm - -.macro sha256_extend_doubleround_head i, rw, ra, rb, ry, rz - ldr lr, [\rw, #(\i+1)*4] - sha256_extend_doubleround_core \i, \rw, \ra, \rb, \ry, \rz - ldr lr, [\rw, #(\i+3)*4] -.endm - -.macro sha256_extend_doubleround_body i, rw, ra, rb, ry, rz - str \rz, [\rw, #(\i+15)*4] - sha256_extend_doubleround_core \i, \rw, \ra, \rb, \ry, \rz - ldr lr, [\rw, #(\i+3)*4] -.endm - -.macro sha256_extend_doubleround_foot i, rw, ra, rb, ry, rz - str \rz, [\rw, #(\i+15)*4] - sha256_extend_doubleround_core \i, \rw, \ra, \rb, \ry, \rz - str \rb, [\rw, #(\i+17)*4] -.endm - -.macro sha256_main_round i, ka, rw, ra, rb, rc, rd, re, rf, rg, rh - ldr r12, [\rw, #(\i)*4] - and r3, \rf, \re - bic lr, \rg, \re - orr lr, lr, r3 - ldr r3, \ka + (\i)*4 - add \rh, \rh, lr - eor lr, \re, \re, ror #5 - add \rh, \rh, r12 - eor lr, lr, \re, ror #19 - add \rh, \rh, r3 - eor r3, \ra, \rb - add \rh, \rh, lr, ror #6 - - and r3, r3, \rc - eor r12, \ra, \ra, ror #11 - and lr, \ra, \rb - eor r12, r12, \ra, ror #20 - eor lr, lr, r3 - add r3, \rh, lr - add \rh, \rh, \rd - add \rd, r3, r12, ror #2 -.endm - -.macro sha256_main_quadround i, ka, rw - sha256_main_round \i+0, \ka, \rw, r4, r5, r6, r7, r8, r9, r10, r11 - sha256_main_round \i+1, \ka, \rw, r7, r4, r5, r6, r11, r8, r9, r10 - sha256_main_round \i+2, \ka, \rw, r6, r7, r4, r5, r10, r11, r8, r9 - sha256_main_round \i+3, \ka, \rw, r5, r6, r7, r4, r9, r10, r11, r8 -.endm - - - .text - .code 32 - .align 2 - .globl sha256_transform - .globl _sha256_transform -#ifdef __ELF__ - .type sha256_transform, %function -#endif -sha256_transform: -_sha256_transform: - stmfd sp!, {r4-r11, lr} - cmp r2, #0 - sub sp, sp, #64*4 - bne sha256_transform_swap - - ldmia r1!, {r4-r11} - stmia sp, {r4-r11} - add r3, sp, #8*4 - ldmia r1, {r4-r11} - stmia r3, {r4-r11} - b sha256_transform_extend - -.macro bswap rd, rn - eor r12, \rn, \rn, ror #16 - bic r12, r12, #0x00ff0000 - mov \rd, \rn, ror #8 - eor \rd, \rd, r12, lsr #8 -.endm - -sha256_transform_swap: - ldmia r1!, {r4-r11} - bswap r4, r4 - bswap r5, r5 - bswap r6, r6 - bswap r7, r7 - bswap r8, r8 - bswap r9, r9 - bswap r10, r10 - bswap r11, r11 - stmia sp, {r4-r11} - add r3, sp, #8*4 - ldmia r1, {r4-r11} - bswap r4, r4 - bswap r5, r5 - bswap r6, r6 - bswap r7, r7 - bswap r8, r8 - bswap r9, r9 - bswap r10, r10 - bswap r11, r11 - stmia r3, {r4-r11} - -sha256_transform_extend: - add r12, sp, #9*4 - ldr r11, [sp, #0*4] - ldmia r12, {r4-r10} - sha256_extend_doubleround_head 0, sp, r4, r5, r9, r10 - sha256_extend_doubleround_body 2, sp, r6, r7, r4, r5 - sha256_extend_doubleround_body 4, sp, r8, r9, r6, r7 - sha256_extend_doubleround_body 6, sp, r10, r4, r8, r9 - sha256_extend_doubleround_body 8, sp, r5, r6, r10, r4 - sha256_extend_doubleround_body 10, sp, r7, r8, r5, r6 - sha256_extend_doubleround_body 12, sp, r9, r10, r7, r8 - sha256_extend_doubleround_body 14, sp, r4, r5, r9, r10 - sha256_extend_doubleround_body 16, sp, r6, r7, r4, r5 - sha256_extend_doubleround_body 18, sp, r8, r9, r6, r7 - sha256_extend_doubleround_body 20, sp, r10, r4, r8, r9 - sha256_extend_doubleround_body 22, sp, r5, r6, r10, r4 - sha256_extend_doubleround_body 24, sp, r7, r8, r5, r6 - sha256_extend_doubleround_body 26, sp, r9, r10, r7, r8 - sha256_extend_doubleround_body 28, sp, r4, r5, r9, r10 - sha256_extend_doubleround_body 30, sp, r6, r7, r4, r5 - sha256_extend_doubleround_body 32, sp, r8, r9, r6, r7 - sha256_extend_doubleround_body 34, sp, r10, r4, r8, r9 - sha256_extend_doubleround_body 36, sp, r5, r6, r10, r4 - sha256_extend_doubleround_body 38, sp, r7, r8, r5, r6 - sha256_extend_doubleround_body 40, sp, r9, r10, r7, r8 - sha256_extend_doubleround_body 42, sp, r4, r5, r9, r10 - sha256_extend_doubleround_body 44, sp, r6, r7, r4, r5 - sha256_extend_doubleround_foot 46, sp, r8, r9, r6, r7 - - ldmia r0, {r4-r11} - sha256_main_quadround 0, sha256_transform_k, sp - sha256_main_quadround 4, sha256_transform_k, sp - sha256_main_quadround 8, sha256_transform_k, sp - sha256_main_quadround 12, sha256_transform_k, sp - sha256_main_quadround 16, sha256_transform_k, sp - sha256_main_quadround 20, sha256_transform_k, sp - sha256_main_quadround 24, sha256_transform_k, sp - sha256_main_quadround 28, sha256_transform_k, sp - b sha256_transform_k_over -sha256_transform_k: - sha256_k -sha256_transform_k_over: - sha256_main_quadround 32, sha256_transform_k, sp - sha256_main_quadround 36, sha256_transform_k, sp - sha256_main_quadround 40, sha256_transform_k, sp - sha256_main_quadround 44, sha256_transform_k, sp - sha256_main_quadround 48, sha256_transform_k, sp - sha256_main_quadround 52, sha256_transform_k, sp - sha256_main_quadround 56, sha256_transform_k, sp - sha256_main_quadround 60, sha256_transform_k, sp - - ldmia r0, {r1, r2, r3, r12} - add r4, r4, r1 - add r5, r5, r2 - add r6, r6, r3 - add r7, r7, r12 - stmia r0!, {r4-r7} - ldmia r0, {r1, r2, r3, r12} - add r8, r8, r1 - add r9, r9, r2 - add r10, r10, r3 - add r11, r11, r12 - stmia r0, {r8-r11} - - add sp, sp, #64*4 -#ifdef __thumb__ - ldmfd sp!, {r4-r11, lr} - bx lr -#else - ldmfd sp!, {r4-r11, pc} -#endif - - - .text - .code 32 - .align 2 - .globl sha256d_ms - .globl _sha256d_ms -#ifdef __ELF__ - .type sha256d_ms, %function -#endif -sha256d_ms: -_sha256d_ms: - stmfd sp!, {r4-r11, lr} - sub sp, sp, #64*4 - - cmp r0, r0 - - ldr lr, [r1, #3*4] - ldr r6, [r1, #18*4] - ldr r7, [r1, #19*4] - - mov r12, lr, ror #7 - str r6, [sp, #18*4] - eor r12, r12, lr, ror #18 - str r7, [sp, #19*4] - eor r12, r12, lr, lsr #3 - ldr r8, [r1, #20*4] - add r6, r6, r12 - ldr r10, [r1, #22*4] - add r7, r7, lr - str r6, [r1, #18*4] - - mov r12, r6, ror #17 - str r7, [r1, #19*4] - eor r12, r12, r6, ror #19 - str r8, [sp, #20*4] - eor r12, r12, r6, lsr #10 - ldr r4, [r1, #23*4] - add r8, r8, r12 - ldr r5, [r1, #24*4] - - mov r9, r7, ror #17 - str r8, [r1, #20*4] - eor r9, r9, r7, ror #19 - str r10, [sp, #21*4] - eor r9, r9, r7, lsr #10 - str r4, [sp, #22*4] - - mov r12, r8, ror #17 - str r9, [r1, #21*4] - eor r12, r12, r8, ror #19 - str r5, [sp, #23*4] - eor r12, r12, r8, lsr #10 - mov lr, r9, ror #17 - add r10, r10, r12 - ldr r11, [r1, #30*4] - - eor lr, lr, r9, ror #19 - str r10, [r1, #22*4] - eor lr, lr, r9, lsr #10 - str r11, [sp, #24*4] - add r4, r4, lr - - mov r12, r10, ror #17 - str r4, [r1, #23*4] - eor r12, r12, r10, ror #19 - mov lr, r4, ror #17 - eor r12, r12, r10, lsr #10 - eor lr, lr, r4, ror #19 - add r5, r5, r12 - eor lr, lr, r4, lsr #10 - str r5, [r1, #24*4] - add r6, r6, lr - - mov r12, r5, ror #17 - str r6, [r1, #25*4] - eor r12, r12, r5, ror #19 - mov lr, r6, ror #17 - eor r12, r12, r5, lsr #10 - eor lr, lr, r6, ror #19 - add r7, r7, r12 - eor lr, lr, r6, lsr #10 - str r7, [r1, #26*4] - add r8, r8, lr - - mov r12, r7, ror #17 - str r8, [r1, #27*4] - eor r12, r12, r7, ror #19 - mov lr, r8, ror #17 - eor r12, r12, r7, lsr #10 - eor lr, lr, r8, ror #19 - add r9, r9, r12 - eor lr, lr, r8, lsr #10 - str r9, [r1, #28*4] - add r10, r10, lr - - ldr lr, [r1, #31*4] - mov r12, r9, ror #17 - str r10, [r1, #29*4] - eor r12, r12, r9, ror #19 - str lr, [sp, #25*4] - eor r12, r12, r9, lsr #10 - add r11, r11, r12 - add r5, r5, lr - mov r12, r10, ror #17 - add r4, r4, r11 - - ldr r11, [r1, #16*4] - eor r12, r12, r10, ror #19 - str r4, [r1, #30*4] - eor r12, r12, r10, lsr #10 - add r5, r5, r12 - ldr lr, [r1, #17*4] - -sha256d_ms_extend_loop2: - sha256_extend_doubleround_body 16, r1, r6, r7, r4, r5 - sha256_extend_doubleround_body 18, r1, r8, r9, r6, r7 - sha256_extend_doubleround_body 20, r1, r10, r4, r8, r9 - sha256_extend_doubleround_body 22, r1, r5, r6, r10, r4 - sha256_extend_doubleround_body 24, r1, r7, r8, r5, r6 - sha256_extend_doubleround_body 26, r1, r9, r10, r7, r8 - sha256_extend_doubleround_body 28, r1, r4, r5, r9, r10 - sha256_extend_doubleround_body 30, r1, r6, r7, r4, r5 - sha256_extend_doubleround_body 32, r1, r8, r9, r6, r7 - sha256_extend_doubleround_body 34, r1, r10, r4, r8, r9 - sha256_extend_doubleround_body 36, r1, r5, r6, r10, r4 - sha256_extend_doubleround_body 38, r1, r7, r8, r5, r6 - sha256_extend_doubleround_body 40, r1, r9, r10, r7, r8 - sha256_extend_doubleround_body 42, r1, r4, r5, r9, r10 - bne sha256d_ms_extend_coda2 - sha256_extend_doubleround_body 44, r1, r6, r7, r4, r5 - sha256_extend_doubleround_foot 46, r1, r8, r9, r6, r7 - - ldr r4, [r3, #0*4] - ldr r9, [r3, #1*4] - ldr r10, [r3, #2*4] - ldr r11, [r3, #3*4] - ldr r8, [r3, #4*4] - ldr r5, [r3, #5*4] - ldr r6, [r3, #6*4] - ldr r7, [r3, #7*4] - b sha256d_ms_main_loop1 - -sha256d_ms_main_loop2: - sha256_main_round 0, sha256d_ms_k, r1, r4, r5, r6, r7, r8, r9, r10, r11 - sha256_main_round 1, sha256d_ms_k, r1, r7, r4, r5, r6, r11, r8, r9, r10 - sha256_main_round 2, sha256d_ms_k, r1, r6, r7, r4, r5, r10, r11, r8, r9 -sha256d_ms_main_loop1: - sha256_main_round 3, sha256d_ms_k, r1, r5, r6, r7, r4, r9, r10, r11, r8 - sha256_main_quadround 4, sha256d_ms_k, r1 - sha256_main_quadround 8, sha256d_ms_k, r1 - sha256_main_quadround 12, sha256d_ms_k, r1 - sha256_main_quadround 16, sha256d_ms_k, r1 - sha256_main_quadround 20, sha256d_ms_k, r1 - sha256_main_quadround 24, sha256d_ms_k, r1 - sha256_main_quadround 28, sha256d_ms_k, r1 - b sha256d_ms_k_over -sha256d_ms_k: - sha256_k -sha256d_ms_k_over: - sha256_main_quadround 32, sha256d_ms_k, r1 - sha256_main_quadround 36, sha256d_ms_k, r1 - sha256_main_quadround 40, sha256d_ms_k, r1 - sha256_main_quadround 44, sha256d_ms_k, r1 - sha256_main_quadround 48, sha256d_ms_k, r1 - sha256_main_quadround 52, sha256d_ms_k, r1 - sha256_main_round 56, sha256d_ms_k, r1, r4, r5, r6, r7, r8, r9, r10, r11 - bne sha256d_ms_finish - sha256_main_round 57, sha256d_ms_k, r1, r7, r4, r5, r6, r11, r8, r9, r10 - sha256_main_round 58, sha256d_ms_k, r1, r6, r7, r4, r5, r10, r11, r8, r9 - sha256_main_round 59, sha256d_ms_k, r1, r5, r6, r7, r4, r9, r10, r11, r8 - sha256_main_quadround 60, sha256d_ms_k, r1 - - ldmia r2!, {r3, r12, lr} - add r4, r4, r3 - add r5, r5, r12 - add r6, r6, lr - stmia sp, {r4-r6} - ldmia r2, {r3, r4, r5, r6, r12} - add lr, sp, #3*4 - add r7, r7, r3 - add r8, r8, r4 - add r9, r9, r5 - add r10, r10, r6 - add r11, r11, r12 - add r12, sp, #18*4 - stmia lr!, {r7-r11} - - ldmia r12, {r4-r11} - str r4, [r1, #18*4] - str r5, [r1, #19*4] - str r6, [r1, #20*4] - str r7, [r1, #22*4] - str r8, [r1, #23*4] - str r9, [r1, #24*4] - str r10, [r1, #30*4] - str r11, [r1, #31*4] - - mov r3, #0x80000000 - mov r4, #0 - mov r5, #0 - mov r6, #0 - mov r7, #0 - mov r8, #0 - mov r9, #0 - mov r10, #0x00000100 - stmia lr, {r3-r10} - - ldr lr, [sp, #1*4] - movs r1, sp - ldr r4, [sp, #0*4] - - ldr r11, [sp, #2*4] - mov r12, lr, ror #7 - eor r12, r12, lr, ror #18 - add r5, lr, #0x00a00000 - eor r12, r12, lr, lsr #3 - mov lr, r11, ror #7 - add r4, r4, r12 - eor lr, lr, r11, ror #18 - str r4, [sp, #16*4] - eor lr, lr, r11, lsr #3 - mov r12, r4, ror #17 - add r5, r5, lr - ldr lr, [sp, #3*4] - - str r5, [sp, #17*4] - eor r12, r12, r4, ror #19 - mov r6, lr, ror #7 - eor r12, r12, r4, lsr #10 - eor r6, r6, lr, ror #18 - add r11, r11, r12 - eor r6, r6, lr, lsr #3 - mov r12, r5, ror #17 - add r6, r6, r11 - ldr r11, [sp, #4*4] - - str r6, [sp, #18*4] - eor r12, r12, r5, ror #19 - mov r7, r11, ror #7 - eor r12, r12, r5, lsr #10 - eor r7, r7, r11, ror #18 - add lr, lr, r12 - eor r7, r7, r11, lsr #3 - mov r12, r6, ror #17 - add r7, r7, lr - ldr lr, [sp, #5*4] - - str r7, [sp, #19*4] - eor r12, r12, r6, ror #19 - mov r8, lr, ror #7 - eor r12, r12, r6, lsr #10 - eor r8, r8, lr, ror #18 - add r11, r11, r12 - eor r8, r8, lr, lsr #3 - mov r12, r7, ror #17 - add r8, r8, r11 - ldr r11, [sp, #6*4] - - str r8, [sp, #20*4] - eor r12, r12, r7, ror #19 - mov r9, r11, ror #7 - eor r12, r12, r7, lsr #10 - eor r9, r9, r11, ror #18 - add lr, lr, r12 - eor r9, r9, r11, lsr #3 - mov r12, r8, ror #17 - add r9, r9, lr - ldr lr, [sp, #7*4] - - str r9, [sp, #21*4] - eor r12, r12, r8, ror #19 - mov r10, lr, ror #7 - eor r12, r12, r8, lsr #10 - eor r10, r10, lr, ror #18 - add r11, r11, r12 - eor r10, r10, lr, lsr #3 - mov r12, r9, ror #17 - add r11, r11, #0x00000100 - add lr, lr, r4 - add r10, r10, r11 - - eor r12, r12, r9, ror #19 - str r10, [sp, #22*4] - add lr, lr, #0x11000000 - eor r12, r12, r9, lsr #10 - add lr, lr, r12 - mov r12, r10, ror #17 - add r4, lr, #0x00002000 - eor r12, r12, r10, ror #19 - str r4, [sp, #23*4] - add r5, r5, #0x80000000 - eor r12, r12, r10, lsr #10 - add r5, r5, r12 - - mov r12, r4, ror #17 - str r5, [sp, #24*4] - eor r12, r12, r4, ror #19 - mov r11, r5, ror #17 - eor r12, r12, r4, lsr #10 - eor r11, r11, r5, ror #19 - add r6, r6, r12 - eor r11, r11, r5, lsr #10 - str r6, [sp, #25*4] - add r7, r7, r11 - - mov r12, r6, ror #17 - str r7, [sp, #26*4] - eor r12, r12, r6, ror #19 - mov r11, r7, ror #17 - eor r12, r12, r6, lsr #10 - eor r11, r11, r7, ror #19 - add r8, r8, r12 - eor r11, r11, r7, lsr #10 - str r8, [sp, #27*4] - add r9, r9, r11 - - mov lr, r8, ror #17 - mov r12, r9, ror #17 - str r9, [sp, #28*4] - add r4, r4, #0x00400000 - eor lr, lr, r8, ror #19 - eor r12, r12, r9, ror #19 - eor lr, lr, r8, lsr #10 - eor r12, r12, r9, lsr #10 - add r4, r4, #0x00000022 - add r10, r10, lr - add r4, r4, r12 - ldr r11, [sp, #16*4] - - add r5, r5, #0x00000100 - str r4, [sp, #30*4] - mov lr, r11, ror #7 - str r10, [sp, #29*4] - mov r12, r10, ror #17 - eor lr, lr, r11, ror #18 - eor r12, r12, r10, ror #19 - eor lr, lr, r11, lsr #3 - eor r12, r12, r10, lsr #10 - add r5, r5, lr - ldr lr, [r1, #17*4] - add r5, r5, r12 - - b sha256d_ms_extend_loop2 - -sha256d_ms_extend_coda2: - str r5, [r1, #(44+15)*4] - mov r12, r4, ror #17 - add r11, r11, r6 - mov r6, lr, ror #7 - eor r12, r12, r4, ror #19 - eor r6, r6, lr, ror #18 - eor r12, r12, r4, lsr #10 - eor r6, r6, lr, lsr #3 - add r12, r12, r11 - add r6, r6, r12 - str r6, [r1, #(44+16)*4] - - adr r2, sha256d_ms_h - ldmia r2, {r4-r11} - b sha256d_ms_main_loop2 - -sha256d_ms_h: - .long 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a - .long 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19 - -.macro sha256_main_round_red i, ka, rw, rd, re, rf, rg, rh - ldr r12, [\rw, #(\i)*4] - and r3, \rf, \re - bic lr, \rg, \re - add \rh, \rh, \rd - orr lr, lr, r3 - ldr r3, \ka + (\i)*4 - add \rh, \rh, lr - eor lr, \re, \re, ror #5 - add \rh, \rh, r12 - eor lr, lr, \re, ror #19 - add \rh, \rh, r3 - add \rh, \rh, lr, ror #6 -.endm - -sha256d_ms_finish: - sha256_main_round_red 57, sha256d_ms_k, r1, r6, r11, r8, r9, r10 - sha256_main_round_red 58, sha256d_ms_k, r1, r5, r10, r11, r8, r9 - sha256_main_round_red 59, sha256d_ms_k, r1, r4, r9, r10, r11, r8 - ldr r5, [r2, #7*4] - sha256_main_round_red 60, sha256d_ms_k, r1, r7, r8, r9, r10, r11 - - add r11, r11, r5 - str r11, [r0, #7*4] - - add sp, sp, #64*4 -#ifdef __thumb__ - ldmfd sp!, {r4-r11, lr} - bx lr -#else - ldmfd sp!, {r4-r11, pc} -#endif - - -#ifdef __ARM_NEON__ - - .text - .code 32 - .align 2 - .globl sha256_init_4way - .globl _sha256_init_4way -#ifdef __ELF__ - .type sha256_init_4way, %function -#endif -sha256_init_4way: -_sha256_init_4way: - adr r12, sha256_4h - vldmia r12, {q8-q15} - vstmia r0, {q8-q15} - bx lr - .align 4 -sha256_4h: - .long 0x6a09e667, 0x6a09e667, 0x6a09e667, 0x6a09e667 - .long 0xbb67ae85, 0xbb67ae85, 0xbb67ae85, 0xbb67ae85 - .long 0x3c6ef372, 0x3c6ef372, 0x3c6ef372, 0x3c6ef372 - .long 0xa54ff53a, 0xa54ff53a, 0xa54ff53a, 0xa54ff53a - .long 0x510e527f, 0x510e527f, 0x510e527f, 0x510e527f - .long 0x9b05688c, 0x9b05688c, 0x9b05688c, 0x9b05688c - .long 0x1f83d9ab, 0x1f83d9ab, 0x1f83d9ab, 0x1f83d9ab - .long 0x5be0cd19, 0x5be0cd19, 0x5be0cd19, 0x5be0cd19 - -.macro sha256_4k - .long 0x428a2f98, 0x428a2f98, 0x428a2f98, 0x428a2f98 - .long 0x71374491, 0x71374491, 0x71374491, 0x71374491 - .long 0xb5c0fbcf, 0xb5c0fbcf, 0xb5c0fbcf, 0xb5c0fbcf - .long 0xe9b5dba5, 0xe9b5dba5, 0xe9b5dba5, 0xe9b5dba5 - .long 0x3956c25b, 0x3956c25b, 0x3956c25b, 0x3956c25b - .long 0x59f111f1, 0x59f111f1, 0x59f111f1, 0x59f111f1 - .long 0x923f82a4, 0x923f82a4, 0x923f82a4, 0x923f82a4 - .long 0xab1c5ed5, 0xab1c5ed5, 0xab1c5ed5, 0xab1c5ed5 - .long 0xd807aa98, 0xd807aa98, 0xd807aa98, 0xd807aa98 - .long 0x12835b01, 0x12835b01, 0x12835b01, 0x12835b01 - .long 0x243185be, 0x243185be, 0x243185be, 0x243185be - .long 0x550c7dc3, 0x550c7dc3, 0x550c7dc3, 0x550c7dc3 - .long 0x72be5d74, 0x72be5d74, 0x72be5d74, 0x72be5d74 - .long 0x80deb1fe, 0x80deb1fe, 0x80deb1fe, 0x80deb1fe - .long 0x9bdc06a7, 0x9bdc06a7, 0x9bdc06a7, 0x9bdc06a7 - .long 0xc19bf174, 0xc19bf174, 0xc19bf174, 0xc19bf174 - .long 0xe49b69c1, 0xe49b69c1, 0xe49b69c1, 0xe49b69c1 - .long 0xefbe4786, 0xefbe4786, 0xefbe4786, 0xefbe4786 - .long 0x0fc19dc6, 0x0fc19dc6, 0x0fc19dc6, 0x0fc19dc6 - .long 0x240ca1cc, 0x240ca1cc, 0x240ca1cc, 0x240ca1cc - .long 0x2de92c6f, 0x2de92c6f, 0x2de92c6f, 0x2de92c6f - .long 0x4a7484aa, 0x4a7484aa, 0x4a7484aa, 0x4a7484aa - .long 0x5cb0a9dc, 0x5cb0a9dc, 0x5cb0a9dc, 0x5cb0a9dc - .long 0x76f988da, 0x76f988da, 0x76f988da, 0x76f988da - .long 0x983e5152, 0x983e5152, 0x983e5152, 0x983e5152 - .long 0xa831c66d, 0xa831c66d, 0xa831c66d, 0xa831c66d - .long 0xb00327c8, 0xb00327c8, 0xb00327c8, 0xb00327c8 - .long 0xbf597fc7, 0xbf597fc7, 0xbf597fc7, 0xbf597fc7 - .long 0xc6e00bf3, 0xc6e00bf3, 0xc6e00bf3, 0xc6e00bf3 - .long 0xd5a79147, 0xd5a79147, 0xd5a79147, 0xd5a79147 - .long 0x06ca6351, 0x06ca6351, 0x06ca6351, 0x06ca6351 - .long 0x14292967, 0x14292967, 0x14292967, 0x14292967 - .long 0x27b70a85, 0x27b70a85, 0x27b70a85, 0x27b70a85 - .long 0x2e1b2138, 0x2e1b2138, 0x2e1b2138, 0x2e1b2138 - .long 0x4d2c6dfc, 0x4d2c6dfc, 0x4d2c6dfc, 0x4d2c6dfc - .long 0x53380d13, 0x53380d13, 0x53380d13, 0x53380d13 - .long 0x650a7354, 0x650a7354, 0x650a7354, 0x650a7354 - .long 0x766a0abb, 0x766a0abb, 0x766a0abb, 0x766a0abb - .long 0x81c2c92e, 0x81c2c92e, 0x81c2c92e, 0x81c2c92e - .long 0x92722c85, 0x92722c85, 0x92722c85, 0x92722c85 - .long 0xa2bfe8a1, 0xa2bfe8a1, 0xa2bfe8a1, 0xa2bfe8a1 - .long 0xa81a664b, 0xa81a664b, 0xa81a664b, 0xa81a664b - .long 0xc24b8b70, 0xc24b8b70, 0xc24b8b70, 0xc24b8b70 - .long 0xc76c51a3, 0xc76c51a3, 0xc76c51a3, 0xc76c51a3 - .long 0xd192e819, 0xd192e819, 0xd192e819, 0xd192e819 - .long 0xd6990624, 0xd6990624, 0xd6990624, 0xd6990624 - .long 0xf40e3585, 0xf40e3585, 0xf40e3585, 0xf40e3585 - .long 0x106aa070, 0x106aa070, 0x106aa070, 0x106aa070 - .long 0x19a4c116, 0x19a4c116, 0x19a4c116, 0x19a4c116 - .long 0x1e376c08, 0x1e376c08, 0x1e376c08, 0x1e376c08 - .long 0x2748774c, 0x2748774c, 0x2748774c, 0x2748774c - .long 0x34b0bcb5, 0x34b0bcb5, 0x34b0bcb5, 0x34b0bcb5 - .long 0x391c0cb3, 0x391c0cb3, 0x391c0cb3, 0x391c0cb3 - .long 0x4ed8aa4a, 0x4ed8aa4a, 0x4ed8aa4a, 0x4ed8aa4a - .long 0x5b9cca4f, 0x5b9cca4f, 0x5b9cca4f, 0x5b9cca4f - .long 0x682e6ff3, 0x682e6ff3, 0x682e6ff3, 0x682e6ff3 - .long 0x748f82ee, 0x748f82ee, 0x748f82ee, 0x748f82ee - .long 0x78a5636f, 0x78a5636f, 0x78a5636f, 0x78a5636f - .long 0x84c87814, 0x84c87814, 0x84c87814, 0x84c87814 - .long 0x8cc70208, 0x8cc70208, 0x8cc70208, 0x8cc70208 - .long 0x90befffa, 0x90befffa, 0x90befffa, 0x90befffa - .long 0xa4506ceb, 0xa4506ceb, 0xa4506ceb, 0xa4506ceb - .long 0xbef9a3f7, 0xbef9a3f7, 0xbef9a3f7, 0xbef9a3f7 - .long 0xc67178f2, 0xc67178f2, 0xc67178f2, 0xc67178f2 -.endm - -.macro sha256_4way_extend_doubleround_core i, rr, rw, ra, rb, ry, rz - vadd.u32 q5, q5, \ra - veor.u32 q4, q4, q0 - vshr.u32 q0, \ry, #19 - vshl.u32 q1, \ry, #32-19 - veor.u32 q4, q4, q0 - vshr.u32 \ra, q6, #7 - vshl.u32 q0, q6, #32-7 - veor.u32 q4, q4, q1 - veor.u32 \ra, \ra, q0 - vshr.u32 q1, \ry, #10 - vshr.u32 q0, q6, #18 - veor.u32 q4, q4, q1 - veor.u32 \ra, \ra, q0 - vshl.u32 q1, q6, #32-18 - vshr.u32 q0, q6, #3 - veor.u32 \ra, \ra, q1 - vadd.u32 q4, q4, q5 - veor.u32 \ra, \ra, q0 - vld1.u32 {q5}, [\rr]! - vadd.u32 \ra, \ra, q4 - - vshr.u32 q4, \rz, #17 - vshl.u32 q0, \rz, #32-17 - vadd.u32 q6, q6, \rb - vst1.u32 {\ra}, [\rw]! - veor.u32 q4, q4, q0 - vshr.u32 q0, \rz, #19 - vshl.u32 q1, \rz, #32-19 - veor.u32 q4, q4, q0 - vshr.u32 \rb, q5, #7 - veor.u32 q4, q4, q1 - vshl.u32 q0, q5, #32-7 - vshr.u32 q1, \rz, #10 - veor.u32 \rb, \rb, q0 - vshr.u32 q0, q5, #18 - veor.u32 q4, q4, q1 - veor.u32 \rb, \rb, q0 - vshl.u32 q1, q5, #32-18 - vshr.u32 q0, q5, #3 - veor.u32 \rb, \rb, q1 - vadd.u32 q1, q6, q4 - veor.u32 \rb, \rb, q0 -.endm - -.macro sha256_4way_extend_doubleround_head i, rr, rw, ra, rb, ry, rz - vld1.u32 {q6}, [\rr]! - vshr.u32 q4, \ry, #17 - vshl.u32 q0, \ry, #32-17 - sha256_4way_extend_doubleround_core \i, \rr, \rw, \ra, \rb, \ry, \rz - vld1.u32 {q6}, [\rr]! - vadd.u32 \rb, \rb, q1 -.endm - -.macro sha256_4way_extend_doubleround_body i, rr, rw, ra, rb, ry, rz - vshr.u32 q4, \ry, #17 - vshl.u32 q0, \ry, #32-17 - vst1.u32 {\rz}, [\rw]! - sha256_4way_extend_doubleround_core \i, \rr, \rw, \ra, \rb, \ry, \rz - vld1.u32 {q6}, [\rr]! - vadd.u32 \rb, \rb, q1 -.endm - -.macro sha256_4way_extend_doubleround_foot i, rr, rw, ra, rb, ry, rz - vshr.u32 q4, \ry, #17 - vshl.u32 q0, \ry, #32-17 - vst1.u32 {\rz}, [\rw]! - sha256_4way_extend_doubleround_core \i, \rr, \rw, \ra, \rb, \ry, \rz - vadd.u32 \rb, \rb, q1 - vst1.u32 {\rb}, [\rw]! -.endm - -.macro sha256_4way_main_round i, rk, rw, ra, rb, rc, rd, re, rf, rg, rh - vld1.u32 {q8}, [\rw]! - vand.u32 q9, \rf, \re - vbic.u32 q10, \rg, \re - vshr.u32 q11, \re, #5 - vorr.u32 q10, q10, q9 - vld1.u32 {q9}, [\rk]! - vadd.u32 \rh, \rh, q10 - vshl.u32 q12, \re, #32-5 - veor.u32 q10, \re, q11 - vshr.u32 q11, \re, #19 - veor.u32 q10, q10, q12 - vshl.u32 q12, \re, #32-19 - veor.u32 q10, q10, q11 - vadd.u32 \rh, \rh, q8 - veor.u32 q10, q10, q12 - vadd.u32 \rh, \rh, q9 - veor.u32 q9, \ra, \rb - vshr.u32 q11, q10, #6 - vshl.u32 q13, q10, #32-6 - vadd.u32 \rh, \rh, q11 - - vshr.u32 q11, \ra, #11 - vshl.u32 q12, \ra, #32-11 - veor.u32 q8, \ra, q11 - vand.u32 q10, \ra, \rb - veor.u32 q8, q8, q12 - vshr.u32 q11, \ra, #20 - vshl.u32 q12, \ra, #32-20 - veor.u32 q8, q8, q11 - vand.u32 q9, q9, \rc - veor.u32 q8, q8, q12 - vadd.u32 \rh, \rh, q13 - veor.u32 q10, q10, q9 - vshr.u32 q11, q8, #2 - vshl.u32 q12, q8, #32-2 - vadd.u32 q9, \rh, q10 - vadd.u32 q12, q12, q11 - vadd.u32 \rh, \rh, \rd - vadd.u32 \rd, q9, q12 -.endm - -.macro sha256_4way_main_quadround i, rk, rw - sha256_4way_main_round \i+0, \rk, \rw, q0, q1, q2, q3, q4, q5, q6, q7 - sha256_4way_main_round \i+1, \rk, \rw, q3, q0, q1, q2, q7, q4, q5, q6 - sha256_4way_main_round \i+2, \rk, \rw, q2, q3, q0, q1, q6, q7, q4, q5 - sha256_4way_main_round \i+3, \rk, \rw, q1, q2, q3, q0, q5, q6, q7, q4 -.endm - - - .text - .code 32 - .align 2 - .globl sha256_transform_4way - .globl _sha256_transform_4way -#ifdef __ELF__ - .type sha256_transform_4way, %function -#endif -sha256_transform_4way: -_sha256_transform_4way: - stmfd sp!, {r4, lr} - vpush {q4-q7} - mov r12, sp - sub sp, sp, #64*16 - bic sp, sp, #63 - cmp r2, #0 - bne sha256_transform_4way_swap - - vldmia r1!, {q0-q7} - vstmia sp, {q0-q7} - add r3, sp, #8*16 - vldmia r1, {q8-q15} - vstmia r3, {q8-q15} - b sha256_transform_4way_extend - -sha256_transform_4way_swap: - vldmia r1!, {q0-q7} - vrev32.8 q0, q0 - vrev32.8 q1, q1 - vrev32.8 q2, q2 - vrev32.8 q3, q3 - vldmia r1, {q8-q15} - vrev32.8 q4, q4 - vrev32.8 q5, q5 - vrev32.8 q6, q6 - vrev32.8 q7, q7 - vstmia sp, {q0-q7} - vrev32.8 q8, q8 - vrev32.8 q9, q9 - vrev32.8 q10, q10 - vrev32.8 q11, q11 - vrev32.8 q12, q12 - vrev32.8 q13, q13 - vrev32.8 q14, q14 - vrev32.8 q15, q15 - add r3, sp, #8*16 - vstmia r3, {q8-q15} - -sha256_transform_4way_extend: - add r1, sp, #1*16 - add r2, sp, #16*16 - vmov.u32 q5, q0 - sha256_4way_extend_doubleround_head 0, r1, r2, q9, q10, q14, q15 - sha256_4way_extend_doubleround_body 2, r1, r2, q11, q12, q9, q10 - sha256_4way_extend_doubleround_body 4, r1, r2, q13, q14, q11, q12 - sha256_4way_extend_doubleround_body 6, r1, r2, q15, q9, q13, q14 - sha256_4way_extend_doubleround_body 8, r1, r2, q10, q11, q15, q9 - sha256_4way_extend_doubleround_body 10, r1, r2, q12, q13, q10, q11 - sha256_4way_extend_doubleround_body 12, r1, r2, q14, q15, q12, q13 - sha256_4way_extend_doubleround_body 14, r1, r2, q9, q10, q14, q15 - sha256_4way_extend_doubleround_body 16, r1, r2, q11, q12, q9, q10 - sha256_4way_extend_doubleround_body 18, r1, r2, q13, q14, q11, q12 - sha256_4way_extend_doubleround_body 20, r1, r2, q15, q9, q13, q14 - sha256_4way_extend_doubleround_body 22, r1, r2, q10, q11, q15, q9 - sha256_4way_extend_doubleround_body 24, r1, r2, q12, q13, q10, q11 - sha256_4way_extend_doubleround_body 26, r1, r2, q14, q15, q12, q13 - sha256_4way_extend_doubleround_body 28, r1, r2, q9, q10, q14, q15 - sha256_4way_extend_doubleround_body 30, r1, r2, q11, q12, q9, q10 - sha256_4way_extend_doubleround_body 32, r1, r2, q13, q14, q11, q12 - sha256_4way_extend_doubleround_body 34, r1, r2, q15, q9, q13, q14 - sha256_4way_extend_doubleround_body 36, r1, r2, q10, q11, q15, q9 - sha256_4way_extend_doubleround_body 38, r1, r2, q12, q13, q10, q11 - sha256_4way_extend_doubleround_body 40, r1, r2, q14, q15, q12, q13 - sha256_4way_extend_doubleround_body 42, r1, r2, q9, q10, q14, q15 - sha256_4way_extend_doubleround_body 44, r1, r2, q11, q12, q9, q10 - sha256_4way_extend_doubleround_foot 46, r1, r2, q13, q14, q11, q12 - - vldmia r0, {q0-q7} - adr r4, sha256_transform_4way_4k - b sha256_transform_4way_4k_over - .align 4 -sha256_transform_4way_4k: - sha256_4k -sha256_transform_4way_4k_over: - sha256_4way_main_quadround 0, r4, sp - sha256_4way_main_quadround 4, r4, sp - sha256_4way_main_quadround 8, r4, sp - sha256_4way_main_quadround 12, r4, sp - sha256_4way_main_quadround 16, r4, sp - sha256_4way_main_quadround 20, r4, sp - sha256_4way_main_quadround 24, r4, sp - sha256_4way_main_quadround 28, r4, sp - sha256_4way_main_quadround 32, r4, sp - sha256_4way_main_quadround 36, r4, sp - sha256_4way_main_quadround 40, r4, sp - sha256_4way_main_quadround 44, r4, sp - sha256_4way_main_quadround 48, r4, sp - sha256_4way_main_quadround 52, r4, sp - sha256_4way_main_quadround 56, r4, sp - sha256_4way_main_quadround 60, r4, sp - - vldmia r0, {q8-q15} - vadd.u32 q0, q0, q8 - vadd.u32 q1, q1, q9 - vadd.u32 q2, q2, q10 - vadd.u32 q3, q3, q11 - vadd.u32 q4, q4, q12 - vadd.u32 q5, q5, q13 - vadd.u32 q6, q6, q14 - vadd.u32 q7, q7, q15 - vstmia r0, {q0-q7} - - mov sp, r12 - vpop {q4-q7} - ldmfd sp!, {r4, pc} - - - .text - .code 32 - .align 2 - .globl sha256d_ms_4way - .globl _sha256d_ms_4way -#ifdef __ELF__ - .type sha256d_ms_4way, %function -#endif -sha256d_ms_4way: -_sha256d_ms_4way: - stmfd sp!, {r4, lr} - vpush {q4-q7} - mov r12, sp - sub sp, sp, #64*16 - bic sp, sp, #63 - - add r4, r1, #3*16 - vld1.u32 {q6}, [r4]! - add r1, r1, #18*16 - vldmia r1, {q11-q13} - cmp r0, r0 - - vshr.u32 q10, q6, #7 - vshl.u32 q0, q6, #32-7 - vshr.u32 q1, q6, #18 - veor.u32 q10, q10, q0 - vshl.u32 q0, q6, #32-18 - veor.u32 q10, q10, q1 - vshr.u32 q1, q6, #3 - veor.u32 q10, q10, q0 - vstmia sp!, {q11-q13} - veor.u32 q4, q10, q1 - vadd.u32 q12, q12, q6 - vadd.u32 q11, q11, q4 - - vshr.u32 q14, q12, #17 - vshr.u32 q4, q11, #17 - vshl.u32 q0, q11, #32-17 - vst1.u32 {q11}, [r1]! - veor.u32 q4, q4, q0 - vshr.u32 q0, q11, #19 - vshl.u32 q1, q11, #32-19 - veor.u32 q4, q4, q0 - vst1.u32 {q12}, [r1]! - veor.u32 q4, q4, q1 - vshr.u32 q1, q11, #10 - vshl.u32 q0, q12, #32-17 - veor.u32 q4, q4, q1 - veor.u32 q14, q14, q0 - vadd.u32 q13, q13, q4 - vshr.u32 q0, q12, #19 - vshl.u32 q1, q12, #32-19 - veor.u32 q14, q14, q0 - vst1.u32 {q13}, [r1]! - veor.u32 q14, q14, q1 - vshr.u32 q1, q12, #10 - - vshr.u32 q4, q13, #17 - vshl.u32 q0, q13, #32-17 - veor.u32 q14, q14, q1 - veor.u32 q4, q4, q0 - vshr.u32 q0, q13, #19 - vshl.u32 q1, q13, #32-19 - veor.u32 q4, q4, q0 - vst1.u32 {q14}, [r1]! - veor.u32 q4, q4, q1 - vshr.u32 q1, q13, #10 - vld1.u32 {q15}, [r1] - veor.u32 q4, q4, q1 - vst1.u32 {q15}, [sp]! - vadd.u32 q15, q15, q4 - vshr.u32 q4, q14, #17 - vshl.u32 q0, q14, #32-17 - vshl.u32 q1, q14, #32-19 - veor.u32 q4, q4, q0 - vshr.u32 q0, q14, #19 - vst1.u32 {q15}, [r1]! - veor.u32 q4, q4, q0 - vld1.u32 {q9}, [r1] - veor.u32 q4, q4, q1 - vshr.u32 q1, q14, #10 - vst1.u32 {q9}, [sp]! - veor.u32 q5, q4, q1 - - vshr.u32 q4, q15, #17 - vadd.u32 q9, q9, q5 - vshl.u32 q0, q15, #32-17 - vshl.u32 q1, q15, #32-19 - veor.u32 q4, q4, q0 - vshr.u32 q0, q15, #19 - vst1.u32 {q9}, [r1]! - veor.u32 q4, q4, q0 - vld1.u32 {q10}, [r1] - veor.u32 q4, q4, q1 - vshr.u32 q1, q15, #10 - vst1.u32 {q10}, [sp]! - veor.u32 q4, q4, q1 - vshl.u32 q0, q9, #32-17 - vadd.u32 q10, q10, q4 - vshr.u32 q4, q9, #17 - vshl.u32 q1, q9, #32-19 - veor.u32 q4, q4, q0 - vshr.u32 q0, q9, #19 - veor.u32 q4, q4, q1 - vshr.u32 q1, q9, #10 - veor.u32 q4, q4, q0 - vst1.u32 {q10}, [r1]! - veor.u32 q5, q4, q1 - - vshr.u32 q4, q10, #17 - vshl.u32 q0, q10, #32-17 - vadd.u32 q11, q11, q5 - veor.u32 q4, q4, q0 - vshr.u32 q0, q10, #19 - vshl.u32 q1, q10, #32-19 - veor.u32 q4, q4, q0 - vst1.u32 {q11}, [r1]! - veor.u32 q4, q4, q1 - vshr.u32 q1, q10, #10 - vshl.u32 q0, q11, #32-17 - veor.u32 q2, q4, q1 - vshr.u32 q4, q11, #17 - vadd.u32 q12, q12, q2 - vshl.u32 q1, q11, #32-19 - veor.u32 q4, q4, q0 - vshr.u32 q0, q11, #19 - veor.u32 q4, q4, q1 - vshr.u32 q1, q11, #10 - veor.u32 q4, q4, q0 - vst1.u32 {q12}, [r1]! - veor.u32 q5, q4, q1 - - vshr.u32 q4, q12, #17 - vshl.u32 q0, q12, #32-17 - vadd.u32 q13, q13, q5 - veor.u32 q4, q4, q0 - vshr.u32 q0, q12, #19 - vshl.u32 q1, q12, #32-19 - veor.u32 q4, q4, q0 - vst1.u32 {q13}, [r1]! - veor.u32 q4, q4, q1 - vshr.u32 q1, q12, #10 - vshl.u32 q0, q13, #32-17 - veor.u32 q2, q4, q1 - vshr.u32 q4, q13, #17 - vadd.u32 q14, q14, q2 - vshl.u32 q1, q13, #32-19 - veor.u32 q4, q4, q0 - vshr.u32 q0, q13, #19 - veor.u32 q4, q4, q1 - vshr.u32 q1, q13, #10 - veor.u32 q4, q4, q0 - vst1.u32 {q14}, [r1]! - veor.u32 q5, q4, q1 - add r4, r4, #12*16 - - vshr.u32 q4, q14, #17 - vshl.u32 q0, q14, #32-17 - vadd.u32 q15, q15, q5 - veor.u32 q4, q4, q0 - vshr.u32 q0, q14, #19 - vshl.u32 q1, q14, #32-19 - veor.u32 q4, q4, q0 - vst1.u32 {q15}, [r1]! - veor.u32 q4, q4, q1 - vshr.u32 q1, q14, #10 - vld1.u32 {q2}, [r1] - veor.u32 q4, q4, q1 - vshl.u32 q0, q15, #32-17 - vadd.u32 q9, q9, q4 - vst1.u32 {q2}, [sp]! - vadd.u32 q9, q9, q2 - vshr.u32 q4, q15, #17 - vshr.u32 q2, q15, #19 - veor.u32 q4, q4, q0 - vst1.u32 {q9}, [r1]! - vshl.u32 q1, q15, #32-19 - veor.u32 q4, q4, q2 - vshr.u32 q0, q15, #10 - veor.u32 q4, q4, q1 - vld1.u32 {q5-q6}, [r4]! - veor.u32 q4, q4, q0 - vld1.u32 {q2}, [r1] - vadd.u32 q10, q10, q4 - vst1.u32 {q2}, [sp]! - vadd.u32 q10, q10, q2 - - sub sp, sp, #8*16 - -sha256d_ms_4way_extend_loop2: - sha256_4way_extend_doubleround_body 16, r4, r1, q11, q12, q9, q10 - sha256_4way_extend_doubleround_body 18, r4, r1, q13, q14, q11, q12 - sha256_4way_extend_doubleround_body 20, r4, r1, q15, q9, q13, q14 - sha256_4way_extend_doubleround_body 22, r4, r1, q10, q11, q15, q9 - sha256_4way_extend_doubleround_body 24, r4, r1, q12, q13, q10, q11 - sha256_4way_extend_doubleround_body 26, r4, r1, q14, q15, q12, q13 - sha256_4way_extend_doubleround_body 28, r4, r1, q9, q10, q14, q15 - sha256_4way_extend_doubleround_body 30, r4, r1, q11, q12, q9, q10 - sha256_4way_extend_doubleround_body 32, r4, r1, q13, q14, q11, q12 - sha256_4way_extend_doubleround_body 34, r4, r1, q15, q9, q13, q14 - sha256_4way_extend_doubleround_body 36, r4, r1, q10, q11, q15, q9 - sha256_4way_extend_doubleround_body 38, r4, r1, q12, q13, q10, q11 - sha256_4way_extend_doubleround_body 40, r4, r1, q14, q15, q12, q13 - sha256_4way_extend_doubleround_body 42, r4, r1, q9, q10, q14, q15 - sha256_4way_extend_doubleround_body 44, r4, r1, q11, q12, q9, q10 - sha256_4way_extend_doubleround_foot 46, r4, r1, q13, q14, q11, q12 - bne sha256d_ms_4way_extend_coda2 - - vldmia r3!, {q4-q7} - vldmia r3, {q0-q3} - vswp q0, q4 - adr r3, sha256d_ms_4way_4k+3*16 - sub r1, r1, #(64-3)*16 - b sha256d_ms_4way_main_loop1 - - .align 4 -sha256d_ms_4way_4k: - sha256_4k - -sha256d_ms_4way_main_loop2: - sha256_4way_main_round 0, r3, r1, q0, q1, q2, q3, q4, q5, q6, q7 - sha256_4way_main_round 1, r3, r1, q3, q0, q1, q2, q7, q4, q5, q6 - sha256_4way_main_round 2, r3, r1, q2, q3, q0, q1, q6, q7, q4, q5 -sha256d_ms_4way_main_loop1: - sha256_4way_main_round 3, r3, r1, q1, q2, q3, q0, q5, q6, q7, q4 - sha256_4way_main_quadround 4, r3, r1 - sha256_4way_main_quadround 8, r3, r1 - sha256_4way_main_quadround 12, r3, r1 - sha256_4way_main_quadround 16, r3, r1 - sha256_4way_main_quadround 20, r3, r1 - sha256_4way_main_quadround 24, r3, r1 - sha256_4way_main_quadround 28, r3, r1 - sha256_4way_main_quadround 32, r3, r1 - sha256_4way_main_quadround 36, r3, r1 - sha256_4way_main_quadround 40, r3, r1 - sha256_4way_main_quadround 44, r3, r1 - sha256_4way_main_quadround 48, r3, r1 - sha256_4way_main_quadround 52, r3, r1 - sha256_4way_main_round 56, r3, r1, q0, q1, q2, q3, q4, q5, q6, q7 - bne sha256d_ms_4way_finish - sha256_4way_main_round 57, r3, r1, q3, q0, q1, q2, q7, q4, q5, q6 - sha256_4way_main_round 58, r3, r1, q2, q3, q0, q1, q6, q7, q4, q5 - sha256_4way_main_round 59, r3, r1, q1, q2, q3, q0, q5, q6, q7, q4 - sha256_4way_main_quadround 60, r3, r1 - - vldmia r2, {q8-q15} - vadd.u32 q0, q0, q8 - vadd.u32 q1, q1, q9 - vadd.u32 q2, q2, q10 - vadd.u32 q3, q3, q11 - vadd.u32 q4, q4, q12 - vadd.u32 q5, q5, q13 - vadd.u32 q6, q6, q14 - vadd.u32 q7, q7, q15 - - vldmia sp, {q8-q15} - sub r1, r1, #(64-18)*16 - vstmia r1, {q8-q10} - add r1, r1, #4*16 - vstmia r1, {q11-q13} - add r1, r1, #8*16 - vstmia r1, {q14-q15} - - vstmia sp, {q0-q7} - vmov.u32 q8, #0x80000000 - vmov.u32 q9, #0 - vmov.u32 q10, #0 - vmov.u32 q11, #0 - vmov.u32 q12, #0 - vmov.u32 q13, #0 - vmov.u32 q14, #0 - vmov.u32 q15, #0x00000100 - add r1, sp, #8*16 - vstmia r1!, {q8-q15} - adds r4, sp, #2*16 - - vshr.u32 q9, q1, #7 - vshl.u32 q2, q1, #32-7 - vshr.u32 q4, q1, #18 - veor.u32 q9, q9, q2 - vshl.u32 q3, q1, #32-18 - veor.u32 q9, q9, q4 - vshr.u32 q2, q1, #3 - veor.u32 q9, q9, q3 - vld1.u32 {q5}, [r4]! - veor.u32 q9, q9, q2 - vmov.u32 q7, #0x00a00000 - vadd.u32 q9, q9, q0 - vshr.u32 q10, q5, #7 - vshl.u32 q0, q5, #32-7 - vshl.u32 q3, q5, #32-18 - veor.u32 q10, q10, q0 - vshr.u32 q0, q5, #18 - veor.u32 q10, q10, q3 - vst1.u32 {q9}, [r1]! - vadd.u32 q3, q1, q7 - veor.u32 q10, q10, q0 - vshr.u32 q0, q5, #3 - vld1.u32 {q6}, [r4]! - veor.u32 q10, q10, q0 - - vshr.u32 q4, q9, #17 - vshl.u32 q0, q9, #32-17 - vadd.u32 q10, q10, q3 - veor.u32 q4, q4, q0 - vshr.u32 q0, q9, #19 - vshl.u32 q1, q9, #32-19 - veor.u32 q4, q4, q0 - vshr.u32 q11, q6, #7 - vshl.u32 q0, q6, #32-7 - veor.u32 q4, q4, q1 - veor.u32 q11, q11, q0 - vshr.u32 q1, q9, #10 - vshr.u32 q0, q6, #18 - veor.u32 q4, q4, q1 - veor.u32 q11, q11, q0 - vshl.u32 q1, q6, #32-18 - vshr.u32 q0, q6, #3 - veor.u32 q11, q11, q1 - vadd.u32 q4, q4, q5 - veor.u32 q11, q11, q0 - vld1.u32 {q5}, [r4]! - vadd.u32 q11, q11, q4 - vshr.u32 q4, q10, #17 - vshl.u32 q0, q10, #32-17 - vst1.u32 {q10}, [r1]! - veor.u32 q4, q4, q0 - vshr.u32 q0, q10, #19 - vshl.u32 q1, q10, #32-19 - veor.u32 q4, q4, q0 - vshr.u32 q12, q5, #7 - veor.u32 q4, q4, q1 - vshl.u32 q0, q5, #32-7 - vshr.u32 q1, q10, #10 - veor.u32 q12, q12, q0 - vshr.u32 q0, q5, #18 - veor.u32 q4, q4, q1 - veor.u32 q12, q12, q0 - vshl.u32 q1, q5, #32-18 - vst1.u32 {q11}, [r1]! - veor.u32 q12, q12, q1 - vshr.u32 q0, q5, #3 - vadd.u32 q1, q6, q4 - veor.u32 q12, q12, q0 - - vshr.u32 q4, q11, #17 - vshl.u32 q0, q11, #32-17 - vadd.u32 q12, q12, q1 - vld1.u32 {q6}, [r4]! - veor.u32 q4, q4, q0 - vshr.u32 q0, q11, #19 - vshl.u32 q1, q11, #32-19 - veor.u32 q4, q4, q0 - vshr.u32 q13, q6, #7 - vshl.u32 q0, q6, #32-7 - veor.u32 q4, q4, q1 - veor.u32 q13, q13, q0 - vshr.u32 q1, q11, #10 - vshr.u32 q0, q6, #18 - veor.u32 q4, q4, q1 - veor.u32 q13, q13, q0 - vshl.u32 q1, q6, #32-18 - vshr.u32 q0, q6, #3 - veor.u32 q13, q13, q1 - vadd.u32 q4, q4, q5 - veor.u32 q13, q13, q0 - vld1.u32 {q5}, [r4]! - vadd.u32 q13, q13, q4 - vshr.u32 q4, q12, #17 - vshl.u32 q0, q12, #32-17 - vst1.u32 {q12}, [r1]! - veor.u32 q4, q4, q0 - vshr.u32 q0, q12, #19 - vshl.u32 q1, q12, #32-19 - veor.u32 q4, q4, q0 - vshr.u32 q14, q5, #7 - veor.u32 q4, q4, q1 - vshl.u32 q0, q5, #32-7 - vshr.u32 q1, q12, #10 - veor.u32 q14, q14, q0 - vshr.u32 q0, q5, #18 - veor.u32 q4, q4, q1 - veor.u32 q14, q14, q0 - vshl.u32 q1, q5, #32-18 - vst1.u32 {q13}, [r1]! - veor.u32 q14, q14, q1 - vshr.u32 q0, q5, #3 - vadd.u32 q1, q6, q4 - veor.u32 q14, q14, q0 - - vshr.u32 q4, q13, #17 - vshl.u32 q0, q13, #32-17 - vadd.u32 q14, q14, q1 - vld1.u32 {q6}, [r4]! - vadd.u32 q5, q5, q15 - veor.u32 q4, q4, q0 - vshr.u32 q0, q13, #19 - vshl.u32 q1, q13, #32-19 - veor.u32 q4, q4, q0 - vshr.u32 q15, q6, #7 - vshl.u32 q0, q6, #32-7 - veor.u32 q4, q4, q1 - veor.u32 q15, q15, q0 - vshr.u32 q1, q13, #10 - vshr.u32 q0, q6, #18 - veor.u32 q4, q4, q1 - veor.u32 q15, q15, q0 - vshl.u32 q1, q6, #32-18 - vshr.u32 q0, q6, #3 - veor.u32 q15, q15, q1 - vadd.u32 q4, q4, q5 - veor.u32 q15, q15, q0 - vmov.u32 q5, #0x80000000 - vadd.u32 q15, q15, q4 - vshr.u32 q4, q14, #17 - vshl.u32 q0, q14, #32-17 - vadd.u32 q6, q6, q9 - vst1.u32 {q14}, [r1]! - vmov.u32 q7, #0x11000000 - veor.u32 q4, q4, q0 - vshr.u32 q0, q14, #19 - vshl.u32 q1, q14, #32-19 - vadd.u32 q6, q6, q7 - vmov.u32 q2, #0x00002000 - veor.u32 q4, q4, q0 - vst1.u32 {q15}, [r1]! - veor.u32 q4, q4, q1 - vshr.u32 q1, q14, #10 - vadd.u32 q6, q6, q2 - veor.u32 q1, q4, q1 - add r4, r4, #8*16 - - vshr.u32 q4, q15, #17 - vshl.u32 q0, q15, #32-17 - vadd.u32 q9, q6, q1 - veor.u32 q4, q4, q0 - vshr.u32 q0, q15, #19 - vshl.u32 q1, q15, #32-19 - veor.u32 q4, q4, q0 - vst1.u32 {q9}, [r1]! - vadd.u32 q5, q5, q10 - veor.u32 q4, q4, q1 - vshr.u32 q1, q15, #10 - vshl.u32 q0, q9, #32-17 - veor.u32 q10, q4, q1 - vshr.u32 q4, q9, #17 - vadd.u32 q10, q10, q5 - veor.u32 q4, q4, q0 - vshr.u32 q0, q9, #19 - vshl.u32 q1, q9, #32-19 - veor.u32 q4, q4, q0 - vshr.u32 q0, q9, #10 - veor.u32 q4, q4, q1 - vst1.u32 {q10}, [r1]! - veor.u32 q1, q4, q0 - - vshr.u32 q4, q10, #17 - vshl.u32 q0, q10, #32-17 - vadd.u32 q11, q11, q1 - veor.u32 q4, q4, q0 - vshr.u32 q0, q10, #19 - vshl.u32 q1, q10, #32-19 - veor.u32 q4, q4, q0 - vst1.u32 {q11}, [r1]! - veor.u32 q4, q4, q1 - vshr.u32 q1, q10, #10 - vshl.u32 q0, q11, #32-17 - veor.u32 q1, q4, q1 - vshr.u32 q4, q11, #17 - vadd.u32 q12, q12, q1 - veor.u32 q4, q4, q0 - vshr.u32 q0, q11, #19 - vshl.u32 q1, q11, #32-19 - veor.u32 q4, q4, q0 - vshr.u32 q0, q11, #10 - veor.u32 q4, q4, q1 - vst1.u32 {q12}, [r1]! - veor.u32 q1, q4, q0 - - vshr.u32 q4, q12, #17 - vshl.u32 q0, q12, #32-17 - vadd.u32 q13, q13, q1 - veor.u32 q4, q4, q0 - vshr.u32 q0, q12, #19 - vshl.u32 q1, q12, #32-19 - veor.u32 q4, q4, q0 - vst1.u32 {q13}, [r1]! - veor.u32 q4, q4, q1 - vshr.u32 q1, q12, #10 - vshl.u32 q0, q13, #32-17 - veor.u32 q1, q4, q1 - vshr.u32 q4, q13, #17 - vadd.u32 q14, q14, q1 - veor.u32 q4, q4, q0 - vshr.u32 q0, q13, #19 - vshl.u32 q1, q13, #32-19 - veor.u32 q4, q4, q0 - vshr.u32 q0, q13, #10 - veor.u32 q4, q4, q1 - vst1.u32 {q14}, [r1]! - veor.u32 q4, q4, q0 - vmov.u32 q6, #0x00000100 - vadd.u32 q15, q15, q4 - - vshr.u32 q4, q14, #17 - vshl.u32 q0, q14, #32-17 - vmov.u32 q7, #0x00400000 - vst1.u32 {q15}, [r1]! - veor.u32 q4, q4, q0 - vshr.u32 q0, q14, #19 - vshl.u32 q1, q14, #32-19 - veor.u32 q4, q4, q0 - vadd.u32 q9, q9, q7 - veor.u32 q4, q4, q1 - vshr.u32 q1, q14, #10 - vmov.u32 q2, #0x00000022 - veor.u32 q4, q4, q1 - vadd.u32 q9, q9, q2 - vld1.u32 {q5}, [r4]! - vadd.u32 q9, q9, q4 - vshr.u32 q4, q15, #17 - vshl.u32 q0, q15, #32-17 - vadd.u32 q6, q6, q10 - vst1.u32 {q9}, [r1]! - veor.u32 q4, q4, q0 - vshr.u32 q0, q15, #19 - vshl.u32 q1, q15, #32-19 - veor.u32 q4, q4, q0 - vshr.u32 q10, q5, #7 - veor.u32 q4, q4, q1 - vshl.u32 q0, q5, #32-7 - vshr.u32 q1, q15, #10 - veor.u32 q10, q10, q0 - vshr.u32 q0, q5, #18 - veor.u32 q4, q4, q1 - veor.u32 q10, q10, q0 - vshl.u32 q1, q5, #32-18 - vshr.u32 q0, q5, #3 - veor.u32 q10, q10, q1 - vadd.u32 q1, q6, q4 - veor.u32 q10, q10, q0 - vld1.u32 {q6}, [r4]! - vadd.u32 q10, q10, q1 - - b sha256d_ms_4way_extend_loop2 - - .align 4 -sha256d_ms_4way_4h: - .long 0x6a09e667, 0x6a09e667, 0x6a09e667, 0x6a09e667 - .long 0xbb67ae85, 0xbb67ae85, 0xbb67ae85, 0xbb67ae85 - .long 0x3c6ef372, 0x3c6ef372, 0x3c6ef372, 0x3c6ef372 - .long 0xa54ff53a, 0xa54ff53a, 0xa54ff53a, 0xa54ff53a - .long 0x510e527f, 0x510e527f, 0x510e527f, 0x510e527f - .long 0x9b05688c, 0x9b05688c, 0x9b05688c, 0x9b05688c - .long 0x1f83d9ab, 0x1f83d9ab, 0x1f83d9ab, 0x1f83d9ab - .long 0x5be0cd19, 0x5be0cd19, 0x5be0cd19, 0x5be0cd19 - -sha256d_ms_4way_extend_coda2: - adr r4, sha256d_ms_4way_4h - mov r1, sp - vldmia r4, {q0-q7} - vmov.u32 q15, q7 - sub r3, r3, #64*16 - b sha256d_ms_4way_main_loop2 - -.macro sha256_4way_main_round_red i, rk, rw, rd, re, rf, rg, rh - vld1.u32 {q8}, [\rw]! - vand.u32 q9, \rf, \re - vbic.u32 q10, \rg, \re - vshr.u32 q11, \re, #5 - vorr.u32 q10, q10, q9 - vshl.u32 q12, \re, #32-5 - vadd.u32 \rh, \rh, q10 - veor.u32 q10, \re, q11 - vshr.u32 q11, \re, #19 - veor.u32 q10, q10, q12 - vshl.u32 q12, \re, #32-19 - veor.u32 q10, q10, q11 - vadd.u32 \rh, \rh, q8 - veor.u32 q10, q10, q12 - vld1.u32 {q9}, [\rk]! - vadd.u32 \rh, \rh, \rd - vshr.u32 q11, q10, #6 - vadd.u32 \rh, \rh, q9 - vshl.u32 q13, q10, #32-6 - vadd.u32 \rh, \rh, q11 - vadd.u32 \rh, \rh, q13 -.endm - -sha256d_ms_4way_finish: - sha256_4way_main_round_red 57, r3, r1, q2, q7, q4, q5, q6 - sha256_4way_main_round_red 58, r3, r1, q1, q6, q7, q4, q5 - sha256_4way_main_round_red 59, r3, r1, q0, q5, q6, q7, q4 - sha256_4way_main_round_red 60, r3, r1, q3, q4, q5, q6, q7 - - vadd.u32 q7, q7, q15 - add r0, r0, #7*16 - vst1.u32 {q7}, [r0] - - mov sp, r12 - vpop {q4-q7} - ldmfd sp!, {r4, pc} - - - .text - .code 32 - .align 2 - .globl sha256_use_4way - .globl _sha256_use_4way -#ifdef __ELF__ - .type sha256_use_4way, %function -#endif -sha256_use_4way: -_sha256_use_4way: - mov r0, #1 - bx lr - -#endif /* __ARM_NEON__ */ - -#endif diff --git a/asm/sha2-x64.S b/asm/sha2-x64.S deleted file mode 100644 index 7b815d14..00000000 --- a/asm/sha2-x64.S +++ /dev/null @@ -1,3661 +0,0 @@ -/* - * Copyright 2012-2013 pooler@litecoinpool.org - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. See COPYING for more details. - */ - -#include - -#if defined(__linux__) && defined(__ELF__) - .section .note.GNU-stack,"",%progbits -#endif - -#if defined(USE_ASM) && defined(__x86_64__) - - .data - .p2align 7 -sha256_4h: - .long 0x6a09e667, 0x6a09e667, 0x6a09e667, 0x6a09e667 - .long 0xbb67ae85, 0xbb67ae85, 0xbb67ae85, 0xbb67ae85 - .long 0x3c6ef372, 0x3c6ef372, 0x3c6ef372, 0x3c6ef372 - .long 0xa54ff53a, 0xa54ff53a, 0xa54ff53a, 0xa54ff53a - .long 0x510e527f, 0x510e527f, 0x510e527f, 0x510e527f - .long 0x9b05688c, 0x9b05688c, 0x9b05688c, 0x9b05688c - .long 0x1f83d9ab, 0x1f83d9ab, 0x1f83d9ab, 0x1f83d9ab - .long 0x5be0cd19, 0x5be0cd19, 0x5be0cd19, 0x5be0cd19 - - .data - .p2align 7 -sha256_4k: - .long 0x428a2f98, 0x428a2f98, 0x428a2f98, 0x428a2f98 - .long 0x71374491, 0x71374491, 0x71374491, 0x71374491 - .long 0xb5c0fbcf, 0xb5c0fbcf, 0xb5c0fbcf, 0xb5c0fbcf - .long 0xe9b5dba5, 0xe9b5dba5, 0xe9b5dba5, 0xe9b5dba5 - .long 0x3956c25b, 0x3956c25b, 0x3956c25b, 0x3956c25b - .long 0x59f111f1, 0x59f111f1, 0x59f111f1, 0x59f111f1 - .long 0x923f82a4, 0x923f82a4, 0x923f82a4, 0x923f82a4 - .long 0xab1c5ed5, 0xab1c5ed5, 0xab1c5ed5, 0xab1c5ed5 - .long 0xd807aa98, 0xd807aa98, 0xd807aa98, 0xd807aa98 - .long 0x12835b01, 0x12835b01, 0x12835b01, 0x12835b01 - .long 0x243185be, 0x243185be, 0x243185be, 0x243185be - .long 0x550c7dc3, 0x550c7dc3, 0x550c7dc3, 0x550c7dc3 - .long 0x72be5d74, 0x72be5d74, 0x72be5d74, 0x72be5d74 - .long 0x80deb1fe, 0x80deb1fe, 0x80deb1fe, 0x80deb1fe - .long 0x9bdc06a7, 0x9bdc06a7, 0x9bdc06a7, 0x9bdc06a7 - .long 0xc19bf174, 0xc19bf174, 0xc19bf174, 0xc19bf174 - .long 0xe49b69c1, 0xe49b69c1, 0xe49b69c1, 0xe49b69c1 - .long 0xefbe4786, 0xefbe4786, 0xefbe4786, 0xefbe4786 - .long 0x0fc19dc6, 0x0fc19dc6, 0x0fc19dc6, 0x0fc19dc6 - .long 0x240ca1cc, 0x240ca1cc, 0x240ca1cc, 0x240ca1cc - .long 0x2de92c6f, 0x2de92c6f, 0x2de92c6f, 0x2de92c6f - .long 0x4a7484aa, 0x4a7484aa, 0x4a7484aa, 0x4a7484aa - .long 0x5cb0a9dc, 0x5cb0a9dc, 0x5cb0a9dc, 0x5cb0a9dc - .long 0x76f988da, 0x76f988da, 0x76f988da, 0x76f988da - .long 0x983e5152, 0x983e5152, 0x983e5152, 0x983e5152 - .long 0xa831c66d, 0xa831c66d, 0xa831c66d, 0xa831c66d - .long 0xb00327c8, 0xb00327c8, 0xb00327c8, 0xb00327c8 - .long 0xbf597fc7, 0xbf597fc7, 0xbf597fc7, 0xbf597fc7 - .long 0xc6e00bf3, 0xc6e00bf3, 0xc6e00bf3, 0xc6e00bf3 - .long 0xd5a79147, 0xd5a79147, 0xd5a79147, 0xd5a79147 - .long 0x06ca6351, 0x06ca6351, 0x06ca6351, 0x06ca6351 - .long 0x14292967, 0x14292967, 0x14292967, 0x14292967 - .long 0x27b70a85, 0x27b70a85, 0x27b70a85, 0x27b70a85 - .long 0x2e1b2138, 0x2e1b2138, 0x2e1b2138, 0x2e1b2138 - .long 0x4d2c6dfc, 0x4d2c6dfc, 0x4d2c6dfc, 0x4d2c6dfc - .long 0x53380d13, 0x53380d13, 0x53380d13, 0x53380d13 - .long 0x650a7354, 0x650a7354, 0x650a7354, 0x650a7354 - .long 0x766a0abb, 0x766a0abb, 0x766a0abb, 0x766a0abb - .long 0x81c2c92e, 0x81c2c92e, 0x81c2c92e, 0x81c2c92e - .long 0x92722c85, 0x92722c85, 0x92722c85, 0x92722c85 - .long 0xa2bfe8a1, 0xa2bfe8a1, 0xa2bfe8a1, 0xa2bfe8a1 - .long 0xa81a664b, 0xa81a664b, 0xa81a664b, 0xa81a664b - .long 0xc24b8b70, 0xc24b8b70, 0xc24b8b70, 0xc24b8b70 - .long 0xc76c51a3, 0xc76c51a3, 0xc76c51a3, 0xc76c51a3 - .long 0xd192e819, 0xd192e819, 0xd192e819, 0xd192e819 - .long 0xd6990624, 0xd6990624, 0xd6990624, 0xd6990624 - .long 0xf40e3585, 0xf40e3585, 0xf40e3585, 0xf40e3585 - .long 0x106aa070, 0x106aa070, 0x106aa070, 0x106aa070 - .long 0x19a4c116, 0x19a4c116, 0x19a4c116, 0x19a4c116 - .long 0x1e376c08, 0x1e376c08, 0x1e376c08, 0x1e376c08 - .long 0x2748774c, 0x2748774c, 0x2748774c, 0x2748774c - .long 0x34b0bcb5, 0x34b0bcb5, 0x34b0bcb5, 0x34b0bcb5 - .long 0x391c0cb3, 0x391c0cb3, 0x391c0cb3, 0x391c0cb3 - .long 0x4ed8aa4a, 0x4ed8aa4a, 0x4ed8aa4a, 0x4ed8aa4a - .long 0x5b9cca4f, 0x5b9cca4f, 0x5b9cca4f, 0x5b9cca4f - .long 0x682e6ff3, 0x682e6ff3, 0x682e6ff3, 0x682e6ff3 - .long 0x748f82ee, 0x748f82ee, 0x748f82ee, 0x748f82ee - .long 0x78a5636f, 0x78a5636f, 0x78a5636f, 0x78a5636f - .long 0x84c87814, 0x84c87814, 0x84c87814, 0x84c87814 - .long 0x8cc70208, 0x8cc70208, 0x8cc70208, 0x8cc70208 - .long 0x90befffa, 0x90befffa, 0x90befffa, 0x90befffa - .long 0xa4506ceb, 0xa4506ceb, 0xa4506ceb, 0xa4506ceb - .long 0xbef9a3f7, 0xbef9a3f7, 0xbef9a3f7, 0xbef9a3f7 - .long 0xc67178f2, 0xc67178f2, 0xc67178f2, 0xc67178f2 - - .data - .p2align 6 -sha256d_4preext2_17: - .long 0x00a00000, 0x00a00000, 0x00a00000, 0x00a00000 -sha256d_4preext2_23: - .long 0x11002000, 0x11002000, 0x11002000, 0x11002000 -sha256d_4preext2_24: - .long 0x80000000, 0x80000000, 0x80000000, 0x80000000 -sha256d_4preext2_30: - .long 0x00400022, 0x00400022, 0x00400022, 0x00400022 - - -#ifdef USE_AVX2 - - .data - .p2align 7 -sha256_8h: - .long 0x6a09e667, 0x6a09e667, 0x6a09e667, 0x6a09e667, 0x6a09e667, 0x6a09e667, 0x6a09e667, 0x6a09e667 - .long 0xbb67ae85, 0xbb67ae85, 0xbb67ae85, 0xbb67ae85, 0xbb67ae85, 0xbb67ae85, 0xbb67ae85, 0xbb67ae85 - .long 0x3c6ef372, 0x3c6ef372, 0x3c6ef372, 0x3c6ef372, 0x3c6ef372, 0x3c6ef372, 0x3c6ef372, 0x3c6ef372 - .long 0xa54ff53a, 0xa54ff53a, 0xa54ff53a, 0xa54ff53a, 0xa54ff53a, 0xa54ff53a, 0xa54ff53a, 0xa54ff53a - .long 0x510e527f, 0x510e527f, 0x510e527f, 0x510e527f, 0x510e527f, 0x510e527f, 0x510e527f, 0x510e527f - .long 0x9b05688c, 0x9b05688c, 0x9b05688c, 0x9b05688c, 0x9b05688c, 0x9b05688c, 0x9b05688c, 0x9b05688c - .long 0x1f83d9ab, 0x1f83d9ab, 0x1f83d9ab, 0x1f83d9ab, 0x1f83d9ab, 0x1f83d9ab, 0x1f83d9ab, 0x1f83d9ab - .long 0x5be0cd19, 0x5be0cd19, 0x5be0cd19, 0x5be0cd19, 0x5be0cd19, 0x5be0cd19, 0x5be0cd19, 0x5be0cd19 - - .data - .p2align 7 -sha256_8k: - .long 0x428a2f98, 0x428a2f98, 0x428a2f98, 0x428a2f98, 0x428a2f98, 0x428a2f98, 0x428a2f98, 0x428a2f98 - .long 0x71374491, 0x71374491, 0x71374491, 0x71374491, 0x71374491, 0x71374491, 0x71374491, 0x71374491 - .long 0xb5c0fbcf, 0xb5c0fbcf, 0xb5c0fbcf, 0xb5c0fbcf, 0xb5c0fbcf, 0xb5c0fbcf, 0xb5c0fbcf, 0xb5c0fbcf - .long 0xe9b5dba5, 0xe9b5dba5, 0xe9b5dba5, 0xe9b5dba5, 0xe9b5dba5, 0xe9b5dba5, 0xe9b5dba5, 0xe9b5dba5 - .long 0x3956c25b, 0x3956c25b, 0x3956c25b, 0x3956c25b, 0x3956c25b, 0x3956c25b, 0x3956c25b, 0x3956c25b - .long 0x59f111f1, 0x59f111f1, 0x59f111f1, 0x59f111f1, 0x59f111f1, 0x59f111f1, 0x59f111f1, 0x59f111f1 - .long 0x923f82a4, 0x923f82a4, 0x923f82a4, 0x923f82a4, 0x923f82a4, 0x923f82a4, 0x923f82a4, 0x923f82a4 - .long 0xab1c5ed5, 0xab1c5ed5, 0xab1c5ed5, 0xab1c5ed5, 0xab1c5ed5, 0xab1c5ed5, 0xab1c5ed5, 0xab1c5ed5 - .long 0xd807aa98, 0xd807aa98, 0xd807aa98, 0xd807aa98, 0xd807aa98, 0xd807aa98, 0xd807aa98, 0xd807aa98 - .long 0x12835b01, 0x12835b01, 0x12835b01, 0x12835b01, 0x12835b01, 0x12835b01, 0x12835b01, 0x12835b01 - .long 0x243185be, 0x243185be, 0x243185be, 0x243185be, 0x243185be, 0x243185be, 0x243185be, 0x243185be - .long 0x550c7dc3, 0x550c7dc3, 0x550c7dc3, 0x550c7dc3, 0x550c7dc3, 0x550c7dc3, 0x550c7dc3, 0x550c7dc3 - .long 0x72be5d74, 0x72be5d74, 0x72be5d74, 0x72be5d74, 0x72be5d74, 0x72be5d74, 0x72be5d74, 0x72be5d74 - .long 0x80deb1fe, 0x80deb1fe, 0x80deb1fe, 0x80deb1fe, 0x80deb1fe, 0x80deb1fe, 0x80deb1fe, 0x80deb1fe - .long 0x9bdc06a7, 0x9bdc06a7, 0x9bdc06a7, 0x9bdc06a7, 0x9bdc06a7, 0x9bdc06a7, 0x9bdc06a7, 0x9bdc06a7 - .long 0xc19bf174, 0xc19bf174, 0xc19bf174, 0xc19bf174, 0xc19bf174, 0xc19bf174, 0xc19bf174, 0xc19bf174 - .long 0xe49b69c1, 0xe49b69c1, 0xe49b69c1, 0xe49b69c1, 0xe49b69c1, 0xe49b69c1, 0xe49b69c1, 0xe49b69c1 - .long 0xefbe4786, 0xefbe4786, 0xefbe4786, 0xefbe4786, 0xefbe4786, 0xefbe4786, 0xefbe4786, 0xefbe4786 - .long 0x0fc19dc6, 0x0fc19dc6, 0x0fc19dc6, 0x0fc19dc6, 0x0fc19dc6, 0x0fc19dc6, 0x0fc19dc6, 0x0fc19dc6 - .long 0x240ca1cc, 0x240ca1cc, 0x240ca1cc, 0x240ca1cc, 0x240ca1cc, 0x240ca1cc, 0x240ca1cc, 0x240ca1cc - .long 0x2de92c6f, 0x2de92c6f, 0x2de92c6f, 0x2de92c6f, 0x2de92c6f, 0x2de92c6f, 0x2de92c6f, 0x2de92c6f - .long 0x4a7484aa, 0x4a7484aa, 0x4a7484aa, 0x4a7484aa, 0x4a7484aa, 0x4a7484aa, 0x4a7484aa, 0x4a7484aa - .long 0x5cb0a9dc, 0x5cb0a9dc, 0x5cb0a9dc, 0x5cb0a9dc, 0x5cb0a9dc, 0x5cb0a9dc, 0x5cb0a9dc, 0x5cb0a9dc - .long 0x76f988da, 0x76f988da, 0x76f988da, 0x76f988da, 0x76f988da, 0x76f988da, 0x76f988da, 0x76f988da - .long 0x983e5152, 0x983e5152, 0x983e5152, 0x983e5152, 0x983e5152, 0x983e5152, 0x983e5152, 0x983e5152 - .long 0xa831c66d, 0xa831c66d, 0xa831c66d, 0xa831c66d, 0xa831c66d, 0xa831c66d, 0xa831c66d, 0xa831c66d - .long 0xb00327c8, 0xb00327c8, 0xb00327c8, 0xb00327c8, 0xb00327c8, 0xb00327c8, 0xb00327c8, 0xb00327c8 - .long 0xbf597fc7, 0xbf597fc7, 0xbf597fc7, 0xbf597fc7, 0xbf597fc7, 0xbf597fc7, 0xbf597fc7, 0xbf597fc7 - .long 0xc6e00bf3, 0xc6e00bf3, 0xc6e00bf3, 0xc6e00bf3, 0xc6e00bf3, 0xc6e00bf3, 0xc6e00bf3, 0xc6e00bf3 - .long 0xd5a79147, 0xd5a79147, 0xd5a79147, 0xd5a79147, 0xd5a79147, 0xd5a79147, 0xd5a79147, 0xd5a79147 - .long 0x06ca6351, 0x06ca6351, 0x06ca6351, 0x06ca6351, 0x06ca6351, 0x06ca6351, 0x06ca6351, 0x06ca6351 - .long 0x14292967, 0x14292967, 0x14292967, 0x14292967, 0x14292967, 0x14292967, 0x14292967, 0x14292967 - .long 0x27b70a85, 0x27b70a85, 0x27b70a85, 0x27b70a85, 0x27b70a85, 0x27b70a85, 0x27b70a85, 0x27b70a85 - .long 0x2e1b2138, 0x2e1b2138, 0x2e1b2138, 0x2e1b2138, 0x2e1b2138, 0x2e1b2138, 0x2e1b2138, 0x2e1b2138 - .long 0x4d2c6dfc, 0x4d2c6dfc, 0x4d2c6dfc, 0x4d2c6dfc, 0x4d2c6dfc, 0x4d2c6dfc, 0x4d2c6dfc, 0x4d2c6dfc - .long 0x53380d13, 0x53380d13, 0x53380d13, 0x53380d13, 0x53380d13, 0x53380d13, 0x53380d13, 0x53380d13 - .long 0x650a7354, 0x650a7354, 0x650a7354, 0x650a7354, 0x650a7354, 0x650a7354, 0x650a7354, 0x650a7354 - .long 0x766a0abb, 0x766a0abb, 0x766a0abb, 0x766a0abb, 0x766a0abb, 0x766a0abb, 0x766a0abb, 0x766a0abb - .long 0x81c2c92e, 0x81c2c92e, 0x81c2c92e, 0x81c2c92e, 0x81c2c92e, 0x81c2c92e, 0x81c2c92e, 0x81c2c92e - .long 0x92722c85, 0x92722c85, 0x92722c85, 0x92722c85, 0x92722c85, 0x92722c85, 0x92722c85, 0x92722c85 - .long 0xa2bfe8a1, 0xa2bfe8a1, 0xa2bfe8a1, 0xa2bfe8a1, 0xa2bfe8a1, 0xa2bfe8a1, 0xa2bfe8a1, 0xa2bfe8a1 - .long 0xa81a664b, 0xa81a664b, 0xa81a664b, 0xa81a664b, 0xa81a664b, 0xa81a664b, 0xa81a664b, 0xa81a664b - .long 0xc24b8b70, 0xc24b8b70, 0xc24b8b70, 0xc24b8b70, 0xc24b8b70, 0xc24b8b70, 0xc24b8b70, 0xc24b8b70 - .long 0xc76c51a3, 0xc76c51a3, 0xc76c51a3, 0xc76c51a3, 0xc76c51a3, 0xc76c51a3, 0xc76c51a3, 0xc76c51a3 - .long 0xd192e819, 0xd192e819, 0xd192e819, 0xd192e819, 0xd192e819, 0xd192e819, 0xd192e819, 0xd192e819 - .long 0xd6990624, 0xd6990624, 0xd6990624, 0xd6990624, 0xd6990624, 0xd6990624, 0xd6990624, 0xd6990624 - .long 0xf40e3585, 0xf40e3585, 0xf40e3585, 0xf40e3585, 0xf40e3585, 0xf40e3585, 0xf40e3585, 0xf40e3585 - .long 0x106aa070, 0x106aa070, 0x106aa070, 0x106aa070, 0x106aa070, 0x106aa070, 0x106aa070, 0x106aa070 - .long 0x19a4c116, 0x19a4c116, 0x19a4c116, 0x19a4c116, 0x19a4c116, 0x19a4c116, 0x19a4c116, 0x19a4c116 - .long 0x1e376c08, 0x1e376c08, 0x1e376c08, 0x1e376c08, 0x1e376c08, 0x1e376c08, 0x1e376c08, 0x1e376c08 - .long 0x2748774c, 0x2748774c, 0x2748774c, 0x2748774c, 0x2748774c, 0x2748774c, 0x2748774c, 0x2748774c - .long 0x34b0bcb5, 0x34b0bcb5, 0x34b0bcb5, 0x34b0bcb5, 0x34b0bcb5, 0x34b0bcb5, 0x34b0bcb5, 0x34b0bcb5 - .long 0x391c0cb3, 0x391c0cb3, 0x391c0cb3, 0x391c0cb3, 0x391c0cb3, 0x391c0cb3, 0x391c0cb3, 0x391c0cb3 - .long 0x4ed8aa4a, 0x4ed8aa4a, 0x4ed8aa4a, 0x4ed8aa4a, 0x4ed8aa4a, 0x4ed8aa4a, 0x4ed8aa4a, 0x4ed8aa4a - .long 0x5b9cca4f, 0x5b9cca4f, 0x5b9cca4f, 0x5b9cca4f, 0x5b9cca4f, 0x5b9cca4f, 0x5b9cca4f, 0x5b9cca4f - .long 0x682e6ff3, 0x682e6ff3, 0x682e6ff3, 0x682e6ff3, 0x682e6ff3, 0x682e6ff3, 0x682e6ff3, 0x682e6ff3 - .long 0x748f82ee, 0x748f82ee, 0x748f82ee, 0x748f82ee, 0x748f82ee, 0x748f82ee, 0x748f82ee, 0x748f82ee - .long 0x78a5636f, 0x78a5636f, 0x78a5636f, 0x78a5636f, 0x78a5636f, 0x78a5636f, 0x78a5636f, 0x78a5636f - .long 0x84c87814, 0x84c87814, 0x84c87814, 0x84c87814, 0x84c87814, 0x84c87814, 0x84c87814, 0x84c87814 - .long 0x8cc70208, 0x8cc70208, 0x8cc70208, 0x8cc70208, 0x8cc70208, 0x8cc70208, 0x8cc70208, 0x8cc70208 - .long 0x90befffa, 0x90befffa, 0x90befffa, 0x90befffa, 0x90befffa, 0x90befffa, 0x90befffa, 0x90befffa - .long 0xa4506ceb, 0xa4506ceb, 0xa4506ceb, 0xa4506ceb, 0xa4506ceb, 0xa4506ceb, 0xa4506ceb, 0xa4506ceb - .long 0xbef9a3f7, 0xbef9a3f7, 0xbef9a3f7, 0xbef9a3f7, 0xbef9a3f7, 0xbef9a3f7, 0xbef9a3f7, 0xbef9a3f7 - .long 0xc67178f2, 0xc67178f2, 0xc67178f2, 0xc67178f2, 0xc67178f2, 0xc67178f2, 0xc67178f2, 0xc67178f2 - - .data - .p2align 6 -sha256d_8preext2_17: - .long 0x00a00000, 0x00a00000, 0x00a00000, 0x00a00000, 0x00a00000, 0x00a00000, 0x00a00000, 0x00a00000 -sha256d_8preext2_23: - .long 0x11002000, 0x11002000, 0x11002000, 0x11002000, 0x11002000, 0x11002000, 0x11002000, 0x11002000 -sha256d_8preext2_24: - .long 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000 -sha256d_8preext2_30: - .long 0x00400022, 0x00400022, 0x00400022, 0x00400022, 0x00400022, 0x00400022, 0x00400022, 0x00400022 - -#endif /* USE_AVX2 */ - - - .text - .p2align 6 - .globl sha256_init_4way - .globl _sha256_init_4way -sha256_init_4way: -_sha256_init_4way: -#if defined(_WIN64) || defined(__CYGWIN__) - pushq %rdi - movq %rcx, %rdi -#endif - movdqa sha256_4h+0(%rip), %xmm0 - movdqa sha256_4h+16(%rip), %xmm1 - movdqa sha256_4h+32(%rip), %xmm2 - movdqa sha256_4h+48(%rip), %xmm3 - movdqu %xmm0, 0(%rdi) - movdqu %xmm1, 16(%rdi) - movdqu %xmm2, 32(%rdi) - movdqu %xmm3, 48(%rdi) - movdqa sha256_4h+64(%rip), %xmm0 - movdqa sha256_4h+80(%rip), %xmm1 - movdqa sha256_4h+96(%rip), %xmm2 - movdqa sha256_4h+112(%rip), %xmm3 - movdqu %xmm0, 64(%rdi) - movdqu %xmm1, 80(%rdi) - movdqu %xmm2, 96(%rdi) - movdqu %xmm3, 112(%rdi) -#if defined(_WIN64) || defined(__CYGWIN__) - popq %rdi -#endif - ret - - -#ifdef USE_AVX2 - .text - .p2align 6 - .globl sha256_init_8way - .globl _sha256_init_8way -sha256_init_8way: -_sha256_init_8way: -#if defined(_WIN64) || defined(__CYGWIN__) - pushq %rdi - movq %rcx, %rdi -#endif - vpbroadcastd sha256_4h+0(%rip), %ymm0 - vpbroadcastd sha256_4h+16(%rip), %ymm1 - vpbroadcastd sha256_4h+32(%rip), %ymm2 - vpbroadcastd sha256_4h+48(%rip), %ymm3 - vmovdqu %ymm0, 0*32(%rdi) - vmovdqu %ymm1, 1*32(%rdi) - vmovdqu %ymm2, 2*32(%rdi) - vmovdqu %ymm3, 3*32(%rdi) - vpbroadcastd sha256_4h+64(%rip), %ymm0 - vpbroadcastd sha256_4h+80(%rip), %ymm1 - vpbroadcastd sha256_4h+96(%rip), %ymm2 - vpbroadcastd sha256_4h+112(%rip), %ymm3 - vmovdqu %ymm0, 4*32(%rdi) - vmovdqu %ymm1, 5*32(%rdi) - vmovdqu %ymm2, 6*32(%rdi) - vmovdqu %ymm3, 7*32(%rdi) -#if defined(_WIN64) || defined(__CYGWIN__) - popq %rdi -#endif - ret -#endif /* USE_AVX2 */ - - -.macro sha256_sse2_extend_round i - movdqa (\i-15)*16(%rax), %xmm0 - movdqa %xmm0, %xmm2 - psrld $3, %xmm0 - movdqa %xmm0, %xmm1 - pslld $14, %xmm2 - psrld $4, %xmm1 - pxor %xmm1, %xmm0 - pxor %xmm2, %xmm0 - psrld $11, %xmm1 - pslld $11, %xmm2 - pxor %xmm1, %xmm0 - pxor %xmm2, %xmm0 - paddd (\i-16)*16(%rax), %xmm0 - paddd (\i-7)*16(%rax), %xmm0 - - movdqa %xmm3, %xmm2 - psrld $10, %xmm3 - pslld $13, %xmm2 - movdqa %xmm3, %xmm1 - psrld $7, %xmm1 - pxor %xmm1, %xmm3 - pxor %xmm2, %xmm3 - psrld $2, %xmm1 - pslld $2, %xmm2 - pxor %xmm1, %xmm3 - pxor %xmm2, %xmm3 - paddd %xmm0, %xmm3 - movdqa %xmm3, \i*16(%rax) -.endm - -.macro sha256_sse2_extend_doubleround i - movdqa (\i-15)*16(%rax), %xmm0 - movdqa (\i-14)*16(%rax), %xmm4 - movdqa %xmm0, %xmm2 - movdqa %xmm4, %xmm6 - psrld $3, %xmm0 - psrld $3, %xmm4 - movdqa %xmm0, %xmm1 - movdqa %xmm4, %xmm5 - pslld $14, %xmm2 - pslld $14, %xmm6 - psrld $4, %xmm1 - psrld $4, %xmm5 - pxor %xmm1, %xmm0 - pxor %xmm5, %xmm4 - psrld $11, %xmm1 - psrld $11, %xmm5 - pxor %xmm2, %xmm0 - pxor %xmm6, %xmm4 - pslld $11, %xmm2 - pslld $11, %xmm6 - pxor %xmm1, %xmm0 - pxor %xmm5, %xmm4 - pxor %xmm2, %xmm0 - pxor %xmm6, %xmm4 - - paddd (\i-16)*16(%rax), %xmm0 - paddd (\i-15)*16(%rax), %xmm4 - - movdqa %xmm3, %xmm2 - movdqa %xmm7, %xmm6 - psrld $10, %xmm3 - psrld $10, %xmm7 - movdqa %xmm3, %xmm1 - movdqa %xmm7, %xmm5 - pslld $13, %xmm2 - pslld $13, %xmm6 - psrld $7, %xmm1 - psrld $7, %xmm5 - - paddd (\i-7)*16(%rax), %xmm0 - paddd (\i-6)*16(%rax), %xmm4 - - pxor %xmm1, %xmm3 - pxor %xmm5, %xmm7 - psrld $2, %xmm1 - psrld $2, %xmm5 - pxor %xmm2, %xmm3 - pxor %xmm6, %xmm7 - pslld $2, %xmm2 - pslld $2, %xmm6 - pxor %xmm1, %xmm3 - pxor %xmm5, %xmm7 - pxor %xmm2, %xmm3 - pxor %xmm6, %xmm7 - - paddd %xmm0, %xmm3 - paddd %xmm4, %xmm7 - movdqa %xmm3, \i*16(%rax) - movdqa %xmm7, (\i+1)*16(%rax) -.endm - -.macro sha256_sse2_main_round i - movdqa 16*(\i)(%rax), %xmm6 - - movdqa %xmm0, %xmm1 - movdqa 16(%rsp), %xmm2 - pandn %xmm2, %xmm1 - paddd 32(%rsp), %xmm6 - - movdqa %xmm2, 32(%rsp) - movdqa 0(%rsp), %xmm2 - movdqa %xmm2, 16(%rsp) - - pand %xmm0, %xmm2 - pxor %xmm2, %xmm1 - movdqa %xmm0, 0(%rsp) - - paddd %xmm1, %xmm6 - - movdqa %xmm0, %xmm1 - psrld $6, %xmm0 - paddd 16*(\i)(%rcx), %xmm6 - movdqa %xmm0, %xmm2 - pslld $7, %xmm1 - psrld $5, %xmm2 - pxor %xmm1, %xmm0 - pxor %xmm2, %xmm0 - pslld $14, %xmm1 - psrld $14, %xmm2 - pxor %xmm1, %xmm0 - pslld $5, %xmm1 - pxor %xmm2, %xmm0 - pxor %xmm1, %xmm0 - movdqa %xmm5, %xmm1 - paddd %xmm0, %xmm6 - - movdqa %xmm3, %xmm0 - movdqa %xmm4, %xmm3 - movdqa %xmm4, %xmm2 - paddd %xmm6, %xmm0 - pand %xmm5, %xmm2 - pand %xmm7, %xmm1 - pand %xmm7, %xmm4 - pxor %xmm4, %xmm1 - movdqa %xmm5, %xmm4 - movdqa %xmm7, %xmm5 - pxor %xmm2, %xmm1 - paddd %xmm1, %xmm6 - - movdqa %xmm7, %xmm2 - psrld $2, %xmm7 - movdqa %xmm7, %xmm1 - pslld $10, %xmm2 - psrld $11, %xmm1 - pxor %xmm2, %xmm7 - pslld $9, %xmm2 - pxor %xmm1, %xmm7 - psrld $9, %xmm1 - pxor %xmm2, %xmm7 - pslld $11, %xmm2 - pxor %xmm1, %xmm7 - pxor %xmm2, %xmm7 - paddd %xmm6, %xmm7 -.endm - -.macro sha256_sse2_main_quadround i - sha256_sse2_main_round \i+0 - sha256_sse2_main_round \i+1 - sha256_sse2_main_round \i+2 - sha256_sse2_main_round \i+3 -.endm - - -#if defined(USE_AVX) - -.macro sha256_avx_extend_round i - vmovdqa (\i-15)*16(%rax), %xmm0 - vpslld $14, %xmm0, %xmm2 - vpsrld $3, %xmm0, %xmm0 - vpsrld $4, %xmm0, %xmm1 - vpxor %xmm1, %xmm0, %xmm0 - vpxor %xmm2, %xmm0, %xmm0 - vpsrld $11, %xmm1, %xmm1 - vpslld $11, %xmm2, %xmm2 - vpxor %xmm1, %xmm0, %xmm0 - vpxor %xmm2, %xmm0, %xmm0 - vpaddd (\i-16)*16(%rax), %xmm0, %xmm0 - vpaddd (\i-7)*16(%rax), %xmm0, %xmm0 - - vpslld $13, %xmm3, %xmm2 - vpsrld $10, %xmm3, %xmm3 - vpsrld $7, %xmm3, %xmm1 - vpxor %xmm1, %xmm3, %xmm3 - vpxor %xmm2, %xmm3, %xmm3 - vpsrld $2, %xmm1, %xmm1 - vpslld $2, %xmm2, %xmm2 - vpxor %xmm1, %xmm3, %xmm3 - vpxor %xmm2, %xmm3, %xmm3 - vpaddd %xmm0, %xmm3, %xmm3 - vmovdqa %xmm3, \i*16(%rax) -.endm - -.macro sha256_avx_extend_doubleround i - vmovdqa (\i-15)*16(%rax), %xmm0 - vmovdqa (\i-14)*16(%rax), %xmm4 - vpslld $14, %xmm0, %xmm2 - vpslld $14, %xmm4, %xmm6 - vpsrld $3, %xmm0, %xmm8 - vpsrld $3, %xmm4, %xmm4 - vpsrld $7, %xmm0, %xmm1 - vpsrld $4, %xmm4, %xmm5 - vpxor %xmm1, %xmm8, %xmm8 - vpxor %xmm5, %xmm4, %xmm4 - vpsrld $11, %xmm1, %xmm1 - vpsrld $11, %xmm5, %xmm5 - vpxor %xmm2, %xmm8, %xmm8 - vpxor %xmm6, %xmm4, %xmm4 - vpslld $11, %xmm2, %xmm2 - vpslld $11, %xmm6, %xmm6 - vpxor %xmm1, %xmm8, %xmm8 - vpxor %xmm5, %xmm4, %xmm4 - vpxor %xmm2, %xmm8, %xmm8 - vpxor %xmm6, %xmm4, %xmm4 - - vpaddd %xmm0, %xmm4, %xmm4 - vpaddd (\i-16)*16(%rax), %xmm8, %xmm0 - - vpslld $13, %xmm3, %xmm2 - vpslld $13, %xmm7, %xmm6 - vpsrld $10, %xmm3, %xmm3 - vpsrld $10, %xmm7, %xmm7 - - vpaddd (\i-7)*16(%rax), %xmm0, %xmm0 - vpaddd (\i-6)*16(%rax), %xmm4, %xmm4 - - vpsrld $7, %xmm3, %xmm1 - vpsrld $7, %xmm7, %xmm5 - vpxor %xmm1, %xmm3, %xmm3 - vpxor %xmm5, %xmm7, %xmm7 - vpsrld $2, %xmm1, %xmm1 - vpsrld $2, %xmm5, %xmm5 - vpxor %xmm2, %xmm3, %xmm3 - vpxor %xmm6, %xmm7, %xmm7 - vpslld $2, %xmm2, %xmm2 - vpslld $2, %xmm6, %xmm6 - vpxor %xmm1, %xmm3, %xmm3 - vpxor %xmm5, %xmm7, %xmm7 - vpxor %xmm2, %xmm3, %xmm3 - vpxor %xmm6, %xmm7, %xmm7 - - vpaddd %xmm0, %xmm3, %xmm3 - vpaddd %xmm4, %xmm7, %xmm7 - vmovdqa %xmm3, \i*16(%rax) - vmovdqa %xmm7, (\i+1)*16(%rax) -.endm - -.macro sha256_avx_main_round i, r0, r1, r2, r3, r4, r5, r6, r7 - vpaddd 16*(\i)(%rax), \r0, %xmm6 - vpaddd 16*(\i)(%rcx), %xmm6, %xmm6 - - vpandn \r1, \r3, %xmm1 - vpand \r3, \r2, %xmm2 - vpxor %xmm2, %xmm1, %xmm1 - vpaddd %xmm1, %xmm6, %xmm6 - - vpslld $7, \r3, %xmm1 - vpsrld $6, \r3, \r0 - vpsrld $5, \r0, %xmm2 - vpxor %xmm1, \r0, \r0 - vpxor %xmm2, \r0, \r0 - vpslld $14, %xmm1, %xmm1 - vpsrld $14, %xmm2, %xmm2 - vpxor %xmm1, \r0, \r0 - vpxor %xmm2, \r0, \r0 - vpslld $5, %xmm1, %xmm1 - vpxor %xmm1, \r0, \r0 - vpaddd \r0, %xmm6, %xmm6 - vpaddd %xmm6, \r4, \r0 - - vpand \r6, \r5, %xmm2 - vpand \r7, \r5, \r4 - vpand \r7, \r6, %xmm1 - vpxor \r4, %xmm1, %xmm1 - vpxor %xmm2, %xmm1, %xmm1 - vpaddd %xmm1, %xmm6, %xmm6 - - vpslld $10, \r7, %xmm2 - vpsrld $2, \r7, \r4 - vpsrld $11, \r4, %xmm1 - vpxor %xmm2, \r4, \r4 - vpxor %xmm1, \r4, \r4 - vpslld $9, %xmm2, %xmm2 - vpsrld $9, %xmm1, %xmm1 - vpxor %xmm2, \r4, \r4 - vpxor %xmm1, \r4, \r4 - vpslld $11, %xmm2, %xmm2 - vpxor %xmm2, \r4, \r4 - vpaddd %xmm6, \r4, \r4 -.endm - -.macro sha256_avx_main_quadround i - sha256_avx_main_round \i+0, %xmm10, %xmm9, %xmm8, %xmm0, %xmm3, %xmm4, %xmm5, %xmm7 - sha256_avx_main_round \i+1, %xmm9, %xmm8, %xmm0, %xmm10, %xmm4, %xmm5, %xmm7, %xmm3 - sha256_avx_main_round \i+2, %xmm8, %xmm0, %xmm10, %xmm9, %xmm5, %xmm7, %xmm3, %xmm4 - sha256_avx_main_round \i+3, %xmm0, %xmm10, %xmm9, %xmm8, %xmm7, %xmm3, %xmm4, %xmm5 -.endm - -#endif /* USE_AVX */ - - -#if defined(USE_AVX2) - -.macro sha256_avx2_extend_round i - vmovdqa (\i-15)*32(%rax), %ymm0 - vpslld $14, %ymm0, %ymm2 - vpsrld $3, %ymm0, %ymm0 - vpsrld $4, %ymm0, %ymm1 - vpxor %ymm1, %ymm0, %ymm0 - vpxor %ymm2, %ymm0, %ymm0 - vpsrld $11, %ymm1, %ymm1 - vpslld $11, %ymm2, %ymm2 - vpxor %ymm1, %ymm0, %ymm0 - vpxor %ymm2, %ymm0, %ymm0 - vpaddd (\i-16)*32(%rax), %ymm0, %ymm0 - vpaddd (\i-7)*32(%rax), %ymm0, %ymm0 - - vpslld $13, %ymm3, %ymm2 - vpsrld $10, %ymm3, %ymm3 - vpsrld $7, %ymm3, %ymm1 - vpxor %ymm1, %ymm3, %ymm3 - vpxor %ymm2, %ymm3, %ymm3 - vpsrld $2, %ymm1, %ymm1 - vpslld $2, %ymm2, %ymm2 - vpxor %ymm1, %ymm3, %ymm3 - vpxor %ymm2, %ymm3, %ymm3 - vpaddd %ymm0, %ymm3, %ymm3 - vmovdqa %ymm3, \i*32(%rax) -.endm - -.macro sha256_avx2_extend_doubleround i - vmovdqa (\i-15)*32(%rax), %ymm0 - vmovdqa (\i-14)*32(%rax), %ymm4 - vpslld $14, %ymm0, %ymm2 - vpslld $14, %ymm4, %ymm6 - vpsrld $3, %ymm0, %ymm8 - vpsrld $3, %ymm4, %ymm4 - vpsrld $7, %ymm0, %ymm1 - vpsrld $4, %ymm4, %ymm5 - vpxor %ymm1, %ymm8, %ymm8 - vpxor %ymm5, %ymm4, %ymm4 - vpsrld $11, %ymm1, %ymm1 - vpsrld $11, %ymm5, %ymm5 - vpxor %ymm2, %ymm8, %ymm8 - vpxor %ymm6, %ymm4, %ymm4 - vpslld $11, %ymm2, %ymm2 - vpslld $11, %ymm6, %ymm6 - vpxor %ymm1, %ymm8, %ymm8 - vpxor %ymm5, %ymm4, %ymm4 - vpxor %ymm2, %ymm8, %ymm8 - vpxor %ymm6, %ymm4, %ymm4 - - vpaddd %ymm0, %ymm4, %ymm4 - vpaddd (\i-16)*32(%rax), %ymm8, %ymm0 - - vpslld $13, %ymm3, %ymm2 - vpslld $13, %ymm7, %ymm6 - vpsrld $10, %ymm3, %ymm3 - vpsrld $10, %ymm7, %ymm7 - - vpaddd (\i-7)*32(%rax), %ymm0, %ymm0 - vpaddd (\i-6)*32(%rax), %ymm4, %ymm4 - - vpsrld $7, %ymm3, %ymm1 - vpsrld $7, %ymm7, %ymm5 - vpxor %ymm1, %ymm3, %ymm3 - vpxor %ymm5, %ymm7, %ymm7 - vpsrld $2, %ymm1, %ymm1 - vpsrld $2, %ymm5, %ymm5 - vpxor %ymm2, %ymm3, %ymm3 - vpxor %ymm6, %ymm7, %ymm7 - vpslld $2, %ymm2, %ymm2 - vpslld $2, %ymm6, %ymm6 - vpxor %ymm1, %ymm3, %ymm3 - vpxor %ymm5, %ymm7, %ymm7 - vpxor %ymm2, %ymm3, %ymm3 - vpxor %ymm6, %ymm7, %ymm7 - - vpaddd %ymm0, %ymm3, %ymm3 - vpaddd %ymm4, %ymm7, %ymm7 - vmovdqa %ymm3, \i*32(%rax) - vmovdqa %ymm7, (\i+1)*32(%rax) -.endm - -.macro sha256_avx2_main_round i, r0, r1, r2, r3, r4, r5, r6, r7 - vpaddd 32*(\i)(%rax), \r0, %ymm6 - vpaddd 32*(\i)(%rcx), %ymm6, %ymm6 - - vpandn \r1, \r3, %ymm1 - vpand \r3, \r2, %ymm2 - vpxor %ymm2, %ymm1, %ymm1 - vpaddd %ymm1, %ymm6, %ymm6 - - vpslld $7, \r3, %ymm1 - vpsrld $6, \r3, \r0 - vpsrld $5, \r0, %ymm2 - vpxor %ymm1, \r0, \r0 - vpxor %ymm2, \r0, \r0 - vpslld $14, %ymm1, %ymm1 - vpsrld $14, %ymm2, %ymm2 - vpxor %ymm1, \r0, \r0 - vpxor %ymm2, \r0, \r0 - vpslld $5, %ymm1, %ymm1 - vpxor %ymm1, \r0, \r0 - vpaddd \r0, %ymm6, %ymm6 - vpaddd %ymm6, \r4, \r0 - - vpand \r6, \r5, %ymm2 - vpand \r7, \r5, \r4 - vpand \r7, \r6, %ymm1 - vpxor \r4, %ymm1, %ymm1 - vpxor %ymm2, %ymm1, %ymm1 - vpaddd %ymm1, %ymm6, %ymm6 - - vpslld $10, \r7, %ymm2 - vpsrld $2, \r7, \r4 - vpsrld $11, \r4, %ymm1 - vpxor %ymm2, \r4, \r4 - vpxor %ymm1, \r4, \r4 - vpslld $9, %ymm2, %ymm2 - vpsrld $9, %ymm1, %ymm1 - vpxor %ymm2, \r4, \r4 - vpxor %ymm1, \r4, \r4 - vpslld $11, %ymm2, %ymm2 - vpxor %ymm2, \r4, \r4 - vpaddd %ymm6, \r4, \r4 -.endm - -.macro sha256_avx2_main_quadround i - sha256_avx2_main_round \i+0, %ymm10, %ymm9, %ymm8, %ymm0, %ymm3, %ymm4, %ymm5, %ymm7 - sha256_avx2_main_round \i+1, %ymm9, %ymm8, %ymm0, %ymm10, %ymm4, %ymm5, %ymm7, %ymm3 - sha256_avx2_main_round \i+2, %ymm8, %ymm0, %ymm10, %ymm9, %ymm5, %ymm7, %ymm3, %ymm4 - sha256_avx2_main_round \i+3, %ymm0, %ymm10, %ymm9, %ymm8, %ymm7, %ymm3, %ymm4, %ymm5 -.endm - -#endif /* USE_AVX2 */ - - -#if defined(USE_XOP) - -.macro sha256_xop_extend_round i - vmovdqa (\i-15)*16(%rax), %xmm0 - vprotd $25, %xmm0, %xmm1 - vprotd $14, %xmm0, %xmm2 - vpsrld $3, %xmm0, %xmm0 - vpxor %xmm1, %xmm2, %xmm2 - vpxor %xmm2, %xmm0, %xmm0 - - vpaddd (\i-16)*16(%rax), %xmm0, %xmm0 - vpaddd (\i-7)*16(%rax), %xmm0, %xmm0 - - vprotd $15, %xmm3, %xmm1 - vprotd $13, %xmm3, %xmm2 - vpsrld $10, %xmm3, %xmm3 - vpxor %xmm1, %xmm2, %xmm2 - vpxor %xmm2, %xmm3, %xmm3 - vpaddd %xmm0, %xmm3, %xmm3 - vmovdqa %xmm3, \i*16(%rax) -.endm - -.macro sha256_xop_extend_doubleround i - vmovdqa (\i-15)*16(%rax), %xmm0 - vmovdqa (\i-14)*16(%rax), %xmm4 - vprotd $25, %xmm0, %xmm1 - vprotd $25, %xmm4, %xmm5 - vprotd $14, %xmm0, %xmm2 - vprotd $14, %xmm4, %xmm6 - vpxor %xmm1, %xmm2, %xmm2 - vpxor %xmm5, %xmm6, %xmm6 - vpsrld $3, %xmm0, %xmm0 - vpsrld $3, %xmm4, %xmm4 - vpxor %xmm2, %xmm0, %xmm0 - vpxor %xmm6, %xmm4, %xmm4 - - vpaddd (\i-16)*16(%rax), %xmm0, %xmm0 - vpaddd (\i-15)*16(%rax), %xmm4, %xmm4 - - vprotd $15, %xmm3, %xmm1 - vprotd $15, %xmm7, %xmm5 - vprotd $13, %xmm3, %xmm2 - vprotd $13, %xmm7, %xmm6 - vpxor %xmm1, %xmm2, %xmm2 - vpxor %xmm5, %xmm6, %xmm6 - - vpaddd (\i-7)*16(%rax), %xmm0, %xmm0 - vpaddd (\i-6)*16(%rax), %xmm4, %xmm4 - - vpsrld $10, %xmm3, %xmm3 - vpsrld $10, %xmm7, %xmm7 - vpxor %xmm2, %xmm3, %xmm3 - vpxor %xmm6, %xmm7, %xmm7 - - vpaddd %xmm0, %xmm3, %xmm3 - vpaddd %xmm4, %xmm7, %xmm7 - vmovdqa %xmm3, \i*16(%rax) - vmovdqa %xmm7, (\i+1)*16(%rax) -.endm - -.macro sha256_xop_main_round i, r0, r1, r2, r3, r4, r5, r6, r7 - vpaddd 16*(\i)(%rax), \r0, %xmm6 - vpaddd 16*(\i)(%rcx), %xmm6, %xmm6 - - vpandn \r1, \r3, %xmm1 - vpand \r3, \r2, %xmm2 - vpxor %xmm2, %xmm1, %xmm1 - vpaddd %xmm1, %xmm6, %xmm6 - - vprotd $26, \r3, %xmm1 - vprotd $21, \r3, %xmm2 - vpxor %xmm1, %xmm2, %xmm2 - vprotd $7, \r3, \r0 - vpxor %xmm2, \r0, \r0 - vpaddd \r0, %xmm6, %xmm6 - vpaddd %xmm6, \r4, \r0 - - vpand \r6, \r5, %xmm2 - vpand \r7, \r5, \r4 - vpand \r7, \r6, %xmm1 - vpxor \r4, %xmm1, %xmm1 - vpxor %xmm2, %xmm1, %xmm1 - vpaddd %xmm1, %xmm6, %xmm6 - - vprotd $30, \r7, %xmm1 - vprotd $19, \r7, %xmm2 - vpxor %xmm1, %xmm2, %xmm2 - vprotd $10, \r7, \r4 - vpxor %xmm2, \r4, \r4 - vpaddd %xmm6, \r4, \r4 -.endm - -.macro sha256_xop_main_quadround i - sha256_xop_main_round \i+0, %xmm10, %xmm9, %xmm8, %xmm0, %xmm3, %xmm4, %xmm5, %xmm7 - sha256_xop_main_round \i+1, %xmm9, %xmm8, %xmm0, %xmm10, %xmm4, %xmm5, %xmm7, %xmm3 - sha256_xop_main_round \i+2, %xmm8, %xmm0, %xmm10, %xmm9, %xmm5, %xmm7, %xmm3, %xmm4 - sha256_xop_main_round \i+3, %xmm0, %xmm10, %xmm9, %xmm8, %xmm7, %xmm3, %xmm4, %xmm5 -.endm - -#endif /* USE_XOP */ - - - .text - .p2align 6 -sha256_transform_4way_core_sse2: - leaq 256(%rsp), %rcx - leaq 48*16(%rcx), %rax - movdqa -2*16(%rcx), %xmm3 - movdqa -1*16(%rcx), %xmm7 -sha256_transform_4way_sse2_extend_loop: - movdqa -15*16(%rcx), %xmm0 - movdqa -14*16(%rcx), %xmm4 - movdqa %xmm0, %xmm2 - movdqa %xmm4, %xmm6 - psrld $3, %xmm0 - psrld $3, %xmm4 - movdqa %xmm0, %xmm1 - movdqa %xmm4, %xmm5 - pslld $14, %xmm2 - pslld $14, %xmm6 - psrld $4, %xmm1 - psrld $4, %xmm5 - pxor %xmm1, %xmm0 - pxor %xmm5, %xmm4 - psrld $11, %xmm1 - psrld $11, %xmm5 - pxor %xmm2, %xmm0 - pxor %xmm6, %xmm4 - pslld $11, %xmm2 - pslld $11, %xmm6 - pxor %xmm1, %xmm0 - pxor %xmm5, %xmm4 - pxor %xmm2, %xmm0 - pxor %xmm6, %xmm4 - - paddd -16*16(%rcx), %xmm0 - paddd -15*16(%rcx), %xmm4 - - movdqa %xmm3, %xmm2 - movdqa %xmm7, %xmm6 - psrld $10, %xmm3 - psrld $10, %xmm7 - movdqa %xmm3, %xmm1 - movdqa %xmm7, %xmm5 - pslld $13, %xmm2 - pslld $13, %xmm6 - psrld $7, %xmm1 - psrld $7, %xmm5 - - paddd -7*16(%rcx), %xmm0 - paddd -6*16(%rcx), %xmm4 - - pxor %xmm1, %xmm3 - pxor %xmm5, %xmm7 - psrld $2, %xmm1 - psrld $2, %xmm5 - pxor %xmm2, %xmm3 - pxor %xmm6, %xmm7 - pslld $2, %xmm2 - pslld $2, %xmm6 - pxor %xmm1, %xmm3 - pxor %xmm5, %xmm7 - pxor %xmm2, %xmm3 - pxor %xmm6, %xmm7 - - paddd %xmm0, %xmm3 - paddd %xmm4, %xmm7 - movdqa %xmm3, (%rcx) - movdqa %xmm7, 16(%rcx) - addq $2*16, %rcx - cmpq %rcx, %rax - jne sha256_transform_4way_sse2_extend_loop - - movdqu 0(%rdi), %xmm7 - movdqu 16(%rdi), %xmm5 - movdqu 32(%rdi), %xmm4 - movdqu 48(%rdi), %xmm3 - movdqu 64(%rdi), %xmm0 - movdqu 80(%rdi), %xmm8 - movdqu 96(%rdi), %xmm9 - movdqu 112(%rdi), %xmm10 - - leaq sha256_4k(%rip), %rcx - xorq %rax, %rax -sha256_transform_4way_sse2_main_loop: - movdqa (%rsp, %rax), %xmm6 - paddd (%rcx, %rax), %xmm6 - paddd %xmm10, %xmm6 - - movdqa %xmm0, %xmm1 - movdqa %xmm9, %xmm2 - pandn %xmm2, %xmm1 - - movdqa %xmm2, %xmm10 - movdqa %xmm8, %xmm2 - movdqa %xmm2, %xmm9 - - pand %xmm0, %xmm2 - pxor %xmm2, %xmm1 - movdqa %xmm0, %xmm8 - - paddd %xmm1, %xmm6 - - movdqa %xmm0, %xmm1 - psrld $6, %xmm0 - movdqa %xmm0, %xmm2 - pslld $7, %xmm1 - psrld $5, %xmm2 - pxor %xmm1, %xmm0 - pxor %xmm2, %xmm0 - pslld $14, %xmm1 - psrld $14, %xmm2 - pxor %xmm1, %xmm0 - pxor %xmm2, %xmm0 - pslld $5, %xmm1 - pxor %xmm1, %xmm0 - paddd %xmm0, %xmm6 - - movdqa %xmm3, %xmm0 - paddd %xmm6, %xmm0 - - movdqa %xmm5, %xmm1 - movdqa %xmm4, %xmm3 - movdqa %xmm4, %xmm2 - pand %xmm5, %xmm2 - pand %xmm7, %xmm4 - pand %xmm7, %xmm1 - pxor %xmm4, %xmm1 - movdqa %xmm5, %xmm4 - movdqa %xmm7, %xmm5 - pxor %xmm2, %xmm1 - paddd %xmm1, %xmm6 - - movdqa %xmm7, %xmm2 - psrld $2, %xmm7 - movdqa %xmm7, %xmm1 - pslld $10, %xmm2 - psrld $11, %xmm1 - pxor %xmm2, %xmm7 - pxor %xmm1, %xmm7 - pslld $9, %xmm2 - psrld $9, %xmm1 - pxor %xmm2, %xmm7 - pxor %xmm1, %xmm7 - pslld $11, %xmm2 - pxor %xmm2, %xmm7 - paddd %xmm6, %xmm7 - - addq $16, %rax - cmpq $16*64, %rax - jne sha256_transform_4way_sse2_main_loop - jmp sha256_transform_4way_finish - - -#if defined(USE_AVX) - .text - .p2align 6 -sha256_transform_4way_core_avx: - leaq 256(%rsp), %rax - movdqa -2*16(%rax), %xmm3 - movdqa -1*16(%rax), %xmm7 - sha256_avx_extend_doubleround 0 - sha256_avx_extend_doubleround 2 - sha256_avx_extend_doubleround 4 - sha256_avx_extend_doubleround 6 - sha256_avx_extend_doubleround 8 - sha256_avx_extend_doubleround 10 - sha256_avx_extend_doubleround 12 - sha256_avx_extend_doubleround 14 - sha256_avx_extend_doubleround 16 - sha256_avx_extend_doubleround 18 - sha256_avx_extend_doubleround 20 - sha256_avx_extend_doubleround 22 - sha256_avx_extend_doubleround 24 - sha256_avx_extend_doubleround 26 - sha256_avx_extend_doubleround 28 - sha256_avx_extend_doubleround 30 - sha256_avx_extend_doubleround 32 - sha256_avx_extend_doubleround 34 - sha256_avx_extend_doubleround 36 - sha256_avx_extend_doubleround 38 - sha256_avx_extend_doubleround 40 - sha256_avx_extend_doubleround 42 - sha256_avx_extend_doubleround 44 - sha256_avx_extend_doubleround 46 - movdqu 0(%rdi), %xmm7 - movdqu 16(%rdi), %xmm5 - movdqu 32(%rdi), %xmm4 - movdqu 48(%rdi), %xmm3 - movdqu 64(%rdi), %xmm0 - movdqu 80(%rdi), %xmm8 - movdqu 96(%rdi), %xmm9 - movdqu 112(%rdi), %xmm10 - movq %rsp, %rax - leaq sha256_4k(%rip), %rcx - sha256_avx_main_quadround 0 - sha256_avx_main_quadround 4 - sha256_avx_main_quadround 8 - sha256_avx_main_quadround 12 - sha256_avx_main_quadround 16 - sha256_avx_main_quadround 20 - sha256_avx_main_quadround 24 - sha256_avx_main_quadround 28 - sha256_avx_main_quadround 32 - sha256_avx_main_quadround 36 - sha256_avx_main_quadround 40 - sha256_avx_main_quadround 44 - sha256_avx_main_quadround 48 - sha256_avx_main_quadround 52 - sha256_avx_main_quadround 56 - sha256_avx_main_quadround 60 - jmp sha256_transform_4way_finish -#endif /* USE_AVX */ - - -#if defined(USE_XOP) - .text - .p2align 6 -sha256_transform_4way_core_xop: - leaq 256(%rsp), %rax - movdqa -2*16(%rax), %xmm3 - movdqa -1*16(%rax), %xmm7 - sha256_xop_extend_doubleround 0 - sha256_xop_extend_doubleround 2 - sha256_xop_extend_doubleround 4 - sha256_xop_extend_doubleround 6 - sha256_xop_extend_doubleround 8 - sha256_xop_extend_doubleround 10 - sha256_xop_extend_doubleround 12 - sha256_xop_extend_doubleround 14 - sha256_xop_extend_doubleround 16 - sha256_xop_extend_doubleround 18 - sha256_xop_extend_doubleround 20 - sha256_xop_extend_doubleround 22 - sha256_xop_extend_doubleround 24 - sha256_xop_extend_doubleround 26 - sha256_xop_extend_doubleround 28 - sha256_xop_extend_doubleround 30 - sha256_xop_extend_doubleround 32 - sha256_xop_extend_doubleround 34 - sha256_xop_extend_doubleround 36 - sha256_xop_extend_doubleround 38 - sha256_xop_extend_doubleround 40 - sha256_xop_extend_doubleround 42 - sha256_xop_extend_doubleround 44 - sha256_xop_extend_doubleround 46 - movdqu 0(%rdi), %xmm7 - movdqu 16(%rdi), %xmm5 - movdqu 32(%rdi), %xmm4 - movdqu 48(%rdi), %xmm3 - movdqu 64(%rdi), %xmm0 - movdqu 80(%rdi), %xmm8 - movdqu 96(%rdi), %xmm9 - movdqu 112(%rdi), %xmm10 - movq %rsp, %rax - leaq sha256_4k(%rip), %rcx - sha256_xop_main_quadround 0 - sha256_xop_main_quadround 4 - sha256_xop_main_quadround 8 - sha256_xop_main_quadround 12 - sha256_xop_main_quadround 16 - sha256_xop_main_quadround 20 - sha256_xop_main_quadround 24 - sha256_xop_main_quadround 28 - sha256_xop_main_quadround 32 - sha256_xop_main_quadround 36 - sha256_xop_main_quadround 40 - sha256_xop_main_quadround 44 - sha256_xop_main_quadround 48 - sha256_xop_main_quadround 52 - sha256_xop_main_quadround 56 - sha256_xop_main_quadround 60 - jmp sha256_transform_4way_finish -#endif /* USE_XOP */ - - - .data - .p2align 3 -sha256_transform_4way_core_addr: - .quad 0x0 - -.macro p2bswap_rsi_rsp i - movdqu \i*16(%rsi), %xmm0 - movdqu (\i+1)*16(%rsi), %xmm2 - pshuflw $0xb1, %xmm0, %xmm0 - pshuflw $0xb1, %xmm2, %xmm2 - pshufhw $0xb1, %xmm0, %xmm0 - pshufhw $0xb1, %xmm2, %xmm2 - movdqa %xmm0, %xmm1 - movdqa %xmm2, %xmm3 - psrlw $8, %xmm1 - psrlw $8, %xmm3 - psllw $8, %xmm0 - psllw $8, %xmm2 - pxor %xmm1, %xmm0 - pxor %xmm3, %xmm2 - movdqa %xmm0, \i*16(%rsp) - movdqa %xmm2, (\i+1)*16(%rsp) -.endm - - .text - .p2align 6 - .globl sha256_transform_4way - .globl _sha256_transform_4way -sha256_transform_4way: -_sha256_transform_4way: -#if defined(_WIN64) || defined(__CYGWIN__) - pushq %rdi - subq $96, %rsp - movdqa %xmm6, 0(%rsp) - movdqa %xmm7, 16(%rsp) - movdqa %xmm8, 32(%rsp) - movdqa %xmm9, 48(%rsp) - movdqa %xmm10, 64(%rsp) - movdqa %xmm11, 80(%rsp) - pushq %rsi - movq %rcx, %rdi - movq %rdx, %rsi - movq %r8, %rdx -#endif - movq %rsp, %r8 - subq $1032, %rsp - andq $-128, %rsp - - testq %rdx, %rdx - jnz sha256_transform_4way_swap - - movdqu 0*16(%rsi), %xmm0 - movdqu 1*16(%rsi), %xmm1 - movdqu 2*16(%rsi), %xmm2 - movdqu 3*16(%rsi), %xmm3 - movdqu 4*16(%rsi), %xmm4 - movdqu 5*16(%rsi), %xmm5 - movdqu 6*16(%rsi), %xmm6 - movdqu 7*16(%rsi), %xmm7 - movdqa %xmm0, 0*16(%rsp) - movdqa %xmm1, 1*16(%rsp) - movdqa %xmm2, 2*16(%rsp) - movdqa %xmm3, 3*16(%rsp) - movdqa %xmm4, 4*16(%rsp) - movdqa %xmm5, 5*16(%rsp) - movdqa %xmm6, 6*16(%rsp) - movdqa %xmm7, 7*16(%rsp) - movdqu 8*16(%rsi), %xmm0 - movdqu 9*16(%rsi), %xmm1 - movdqu 10*16(%rsi), %xmm2 - movdqu 11*16(%rsi), %xmm3 - movdqu 12*16(%rsi), %xmm4 - movdqu 13*16(%rsi), %xmm5 - movdqu 14*16(%rsi), %xmm6 - movdqu 15*16(%rsi), %xmm7 - movdqa %xmm0, 8*16(%rsp) - movdqa %xmm1, 9*16(%rsp) - movdqa %xmm2, 10*16(%rsp) - movdqa %xmm3, 11*16(%rsp) - movdqa %xmm4, 12*16(%rsp) - movdqa %xmm5, 13*16(%rsp) - movdqa %xmm6, 14*16(%rsp) - movdqa %xmm7, 15*16(%rsp) - jmp *sha256_transform_4way_core_addr(%rip) - - .p2align 6 -sha256_transform_4way_swap: - p2bswap_rsi_rsp 0 - p2bswap_rsi_rsp 2 - p2bswap_rsi_rsp 4 - p2bswap_rsi_rsp 6 - p2bswap_rsi_rsp 8 - p2bswap_rsi_rsp 10 - p2bswap_rsi_rsp 12 - p2bswap_rsi_rsp 14 - jmp *sha256_transform_4way_core_addr(%rip) - - .p2align 6 -sha256_transform_4way_finish: - movdqu 0(%rdi), %xmm2 - movdqu 16(%rdi), %xmm6 - movdqu 32(%rdi), %xmm11 - movdqu 48(%rdi), %xmm1 - paddd %xmm2, %xmm7 - paddd %xmm6, %xmm5 - paddd %xmm11, %xmm4 - paddd %xmm1, %xmm3 - movdqu 64(%rdi), %xmm2 - movdqu 80(%rdi), %xmm6 - movdqu 96(%rdi), %xmm11 - movdqu 112(%rdi), %xmm1 - paddd %xmm2, %xmm0 - paddd %xmm6, %xmm8 - paddd %xmm11, %xmm9 - paddd %xmm1, %xmm10 - - movdqu %xmm7, 0(%rdi) - movdqu %xmm5, 16(%rdi) - movdqu %xmm4, 32(%rdi) - movdqu %xmm3, 48(%rdi) - movdqu %xmm0, 64(%rdi) - movdqu %xmm8, 80(%rdi) - movdqu %xmm9, 96(%rdi) - movdqu %xmm10, 112(%rdi) - - movq %r8, %rsp -#if defined(_WIN64) || defined(__CYGWIN__) - popq %rsi - movdqa 0(%rsp), %xmm6 - movdqa 16(%rsp), %xmm7 - movdqa 32(%rsp), %xmm8 - movdqa 48(%rsp), %xmm9 - movdqa 64(%rsp), %xmm10 - movdqa 80(%rsp), %xmm11 - addq $96, %rsp - popq %rdi -#endif - ret - - -#ifdef USE_AVX2 - - .text - .p2align 6 -sha256_transform_8way_core_avx2: - leaq 8*64(%rsp), %rax - vmovdqa -2*32(%rax), %ymm3 - vmovdqa -1*32(%rax), %ymm7 - sha256_avx2_extend_doubleround 0 - sha256_avx2_extend_doubleround 2 - sha256_avx2_extend_doubleround 4 - sha256_avx2_extend_doubleround 6 - sha256_avx2_extend_doubleround 8 - sha256_avx2_extend_doubleround 10 - sha256_avx2_extend_doubleround 12 - sha256_avx2_extend_doubleround 14 - sha256_avx2_extend_doubleround 16 - sha256_avx2_extend_doubleround 18 - sha256_avx2_extend_doubleround 20 - sha256_avx2_extend_doubleround 22 - sha256_avx2_extend_doubleround 24 - sha256_avx2_extend_doubleround 26 - sha256_avx2_extend_doubleround 28 - sha256_avx2_extend_doubleround 30 - sha256_avx2_extend_doubleround 32 - sha256_avx2_extend_doubleround 34 - sha256_avx2_extend_doubleround 36 - sha256_avx2_extend_doubleround 38 - sha256_avx2_extend_doubleround 40 - sha256_avx2_extend_doubleround 42 - sha256_avx2_extend_doubleround 44 - sha256_avx2_extend_doubleround 46 - vmovdqu 0*32(%rdi), %ymm7 - vmovdqu 1*32(%rdi), %ymm5 - vmovdqu 2*32(%rdi), %ymm4 - vmovdqu 3*32(%rdi), %ymm3 - vmovdqu 4*32(%rdi), %ymm0 - vmovdqu 5*32(%rdi), %ymm8 - vmovdqu 6*32(%rdi), %ymm9 - vmovdqu 7*32(%rdi), %ymm10 - movq %rsp, %rax - leaq sha256_8k(%rip), %rcx - sha256_avx2_main_quadround 0 - sha256_avx2_main_quadround 4 - sha256_avx2_main_quadround 8 - sha256_avx2_main_quadround 12 - sha256_avx2_main_quadround 16 - sha256_avx2_main_quadround 20 - sha256_avx2_main_quadround 24 - sha256_avx2_main_quadround 28 - sha256_avx2_main_quadround 32 - sha256_avx2_main_quadround 36 - sha256_avx2_main_quadround 40 - sha256_avx2_main_quadround 44 - sha256_avx2_main_quadround 48 - sha256_avx2_main_quadround 52 - sha256_avx2_main_quadround 56 - sha256_avx2_main_quadround 60 - jmp sha256_transform_8way_finish - -.macro p2bswap_avx2_rsi_rsp i - vmovdqu \i*32(%rsi), %ymm0 - vmovdqu (\i+1)*32(%rsi), %ymm2 - vpshuflw $0xb1, %ymm0, %ymm0 - vpshuflw $0xb1, %ymm2, %ymm2 - vpshufhw $0xb1, %ymm0, %ymm0 - vpshufhw $0xb1, %ymm2, %ymm2 - vpsrlw $8, %ymm0, %ymm1 - vpsrlw $8, %ymm2, %ymm3 - vpsllw $8, %ymm0, %ymm0 - vpsllw $8, %ymm2, %ymm2 - vpxor %ymm1, %ymm0, %ymm0 - vpxor %ymm3, %ymm2, %ymm2 - vmovdqa %ymm0, \i*32(%rsp) - vmovdqa %ymm2, (\i+1)*32(%rsp) -.endm - - .text - .p2align 6 - .globl sha256_transform_8way - .globl _sha256_transform_8way -sha256_transform_8way: -_sha256_transform_8way: -#if defined(_WIN64) || defined(__CYGWIN__) - pushq %rdi - subq $96, %rsp - vmovdqa %xmm6, 0(%rsp) - vmovdqa %xmm7, 16(%rsp) - vmovdqa %xmm8, 32(%rsp) - vmovdqa %xmm9, 48(%rsp) - vmovdqa %xmm10, 64(%rsp) - vmovdqa %xmm11, 80(%rsp) - pushq %rsi - movq %rcx, %rdi - movq %rdx, %rsi - movq %r8, %rdx -#endif - movq %rsp, %r8 - subq $64*32, %rsp - andq $-128, %rsp - - testq %rdx, %rdx - jnz sha256_transform_8way_swap - - vmovdqu 0*32(%rsi), %ymm0 - vmovdqu 1*32(%rsi), %ymm1 - vmovdqu 2*32(%rsi), %ymm2 - vmovdqu 3*32(%rsi), %ymm3 - vmovdqu 4*32(%rsi), %ymm4 - vmovdqu 5*32(%rsi), %ymm5 - vmovdqu 6*32(%rsi), %ymm6 - vmovdqu 7*32(%rsi), %ymm7 - vmovdqa %ymm0, 0*32(%rsp) - vmovdqa %ymm1, 1*32(%rsp) - vmovdqa %ymm2, 2*32(%rsp) - vmovdqa %ymm3, 3*32(%rsp) - vmovdqa %ymm4, 4*32(%rsp) - vmovdqa %ymm5, 5*32(%rsp) - vmovdqa %ymm6, 6*32(%rsp) - vmovdqa %ymm7, 7*32(%rsp) - vmovdqu 8*32(%rsi), %ymm0 - vmovdqu 9*32(%rsi), %ymm1 - vmovdqu 10*32(%rsi), %ymm2 - vmovdqu 11*32(%rsi), %ymm3 - vmovdqu 12*32(%rsi), %ymm4 - vmovdqu 13*32(%rsi), %ymm5 - vmovdqu 14*32(%rsi), %ymm6 - vmovdqu 15*32(%rsi), %ymm7 - vmovdqa %ymm0, 8*32(%rsp) - vmovdqa %ymm1, 9*32(%rsp) - vmovdqa %ymm2, 10*32(%rsp) - vmovdqa %ymm3, 11*32(%rsp) - vmovdqa %ymm4, 12*32(%rsp) - vmovdqa %ymm5, 13*32(%rsp) - vmovdqa %ymm6, 14*32(%rsp) - vmovdqa %ymm7, 15*32(%rsp) - jmp sha256_transform_8way_core_avx2 - - .p2align 6 -sha256_transform_8way_swap: - p2bswap_avx2_rsi_rsp 0 - p2bswap_avx2_rsi_rsp 2 - p2bswap_avx2_rsi_rsp 4 - p2bswap_avx2_rsi_rsp 6 - p2bswap_avx2_rsi_rsp 8 - p2bswap_avx2_rsi_rsp 10 - p2bswap_avx2_rsi_rsp 12 - p2bswap_avx2_rsi_rsp 14 - jmp sha256_transform_8way_core_avx2 - - .p2align 6 -sha256_transform_8way_finish: - vmovdqu 0*32(%rdi), %ymm2 - vmovdqu 1*32(%rdi), %ymm6 - vmovdqu 2*32(%rdi), %ymm11 - vmovdqu 3*32(%rdi), %ymm1 - vpaddd %ymm2, %ymm7, %ymm7 - vpaddd %ymm6, %ymm5, %ymm5 - vpaddd %ymm11, %ymm4, %ymm4 - vpaddd %ymm1, %ymm3, %ymm3 - vmovdqu 4*32(%rdi), %ymm2 - vmovdqu 5*32(%rdi), %ymm6 - vmovdqu 6*32(%rdi), %ymm11 - vmovdqu 7*32(%rdi), %ymm1 - vpaddd %ymm2, %ymm0, %ymm0 - vpaddd %ymm6, %ymm8, %ymm8 - vpaddd %ymm11, %ymm9, %ymm9 - vpaddd %ymm1, %ymm10, %ymm10 - - vmovdqu %ymm7, 0*32(%rdi) - vmovdqu %ymm5, 1*32(%rdi) - vmovdqu %ymm4, 2*32(%rdi) - vmovdqu %ymm3, 3*32(%rdi) - vmovdqu %ymm0, 4*32(%rdi) - vmovdqu %ymm8, 5*32(%rdi) - vmovdqu %ymm9, 6*32(%rdi) - vmovdqu %ymm10, 7*32(%rdi) - - movq %r8, %rsp -#if defined(_WIN64) || defined(__CYGWIN__) - popq %rsi - vmovdqa 0(%rsp), %xmm6 - vmovdqa 16(%rsp), %xmm7 - vmovdqa 32(%rsp), %xmm8 - vmovdqa 48(%rsp), %xmm9 - vmovdqa 64(%rsp), %xmm10 - vmovdqa 80(%rsp), %xmm11 - addq $96, %rsp - popq %rdi -#endif - ret - -#endif /* USE_AVX2 */ - - - .data - .p2align 3 -sha256d_ms_4way_addr: - .quad 0x0 - - .text - .p2align 6 - .globl sha256d_ms_4way - .globl _sha256d_ms_4way -sha256d_ms_4way: -_sha256d_ms_4way: - jmp *sha256d_ms_4way_addr(%rip) - - - .p2align 6 -sha256d_ms_4way_sse2: -#if defined(_WIN64) || defined(__CYGWIN__) - pushq %rdi - subq $32, %rsp - movdqa %xmm6, 0(%rsp) - movdqa %xmm7, 16(%rsp) - pushq %rsi - movq %rcx, %rdi - movq %rdx, %rsi - movq %r8, %rdx - movq %r9, %rcx -#endif - subq $8+67*16, %rsp - - leaq 256(%rsi), %rax - -sha256d_ms_4way_sse2_extend_loop1: - movdqa 3*16(%rsi), %xmm0 - movdqa 2*16(%rax), %xmm3 - movdqa 3*16(%rax), %xmm7 - movdqa %xmm3, 5*16(%rsp) - movdqa %xmm7, 6*16(%rsp) - movdqa %xmm0, %xmm2 - paddd %xmm0, %xmm7 - psrld $3, %xmm0 - movdqa %xmm0, %xmm1 - pslld $14, %xmm2 - psrld $4, %xmm1 - pxor %xmm1, %xmm0 - pxor %xmm2, %xmm0 - psrld $11, %xmm1 - pslld $11, %xmm2 - pxor %xmm1, %xmm0 - pxor %xmm2, %xmm0 - paddd %xmm0, %xmm3 - movdqa %xmm3, 2*16(%rax) - movdqa %xmm7, 3*16(%rax) - - movdqa 4*16(%rax), %xmm0 - movdqa %xmm0, 7*16(%rsp) - movdqa %xmm3, %xmm2 - movdqa %xmm7, %xmm6 - psrld $10, %xmm3 - psrld $10, %xmm7 - movdqa %xmm3, %xmm1 - movdqa %xmm7, %xmm5 - pslld $13, %xmm2 - pslld $13, %xmm6 - psrld $7, %xmm1 - psrld $7, %xmm5 - pxor %xmm1, %xmm3 - pxor %xmm5, %xmm7 - psrld $2, %xmm1 - psrld $2, %xmm5 - pxor %xmm2, %xmm3 - pxor %xmm6, %xmm7 - pslld $2, %xmm2 - pslld $2, %xmm6 - pxor %xmm1, %xmm3 - pxor %xmm5, %xmm7 - pxor %xmm2, %xmm3 - pxor %xmm6, %xmm7 - paddd %xmm0, %xmm3 - movdqa %xmm3, 4*16(%rax) - movdqa %xmm7, 5*16(%rax) - - movdqa 6*16(%rax), %xmm0 - movdqa 7*16(%rax), %xmm4 - movdqa %xmm0, 9*16(%rsp) - movdqa %xmm4, 10*16(%rsp) - movdqa %xmm3, %xmm2 - movdqa %xmm7, %xmm6 - psrld $10, %xmm3 - psrld $10, %xmm7 - movdqa %xmm3, %xmm1 - movdqa %xmm7, %xmm5 - pslld $13, %xmm2 - pslld $13, %xmm6 - psrld $7, %xmm1 - psrld $7, %xmm5 - pxor %xmm1, %xmm3 - pxor %xmm5, %xmm7 - psrld $2, %xmm1 - psrld $2, %xmm5 - pxor %xmm2, %xmm3 - pxor %xmm6, %xmm7 - pslld $2, %xmm2 - pslld $2, %xmm6 - pxor %xmm1, %xmm3 - pxor %xmm5, %xmm7 - pxor %xmm2, %xmm3 - pxor %xmm6, %xmm7 - paddd %xmm0, %xmm3 - paddd %xmm4, %xmm7 - movdqa %xmm3, 6*16(%rax) - movdqa %xmm7, 7*16(%rax) - - movdqa 8*16(%rax), %xmm0 - movdqa 2*16(%rax), %xmm4 - movdqa %xmm0, 11*16(%rsp) - movdqa %xmm3, %xmm2 - movdqa %xmm7, %xmm6 - psrld $10, %xmm3 - psrld $10, %xmm7 - movdqa %xmm3, %xmm1 - movdqa %xmm7, %xmm5 - pslld $13, %xmm2 - pslld $13, %xmm6 - psrld $7, %xmm1 - psrld $7, %xmm5 - pxor %xmm1, %xmm3 - pxor %xmm5, %xmm7 - psrld $2, %xmm1 - psrld $2, %xmm5 - pxor %xmm2, %xmm3 - pxor %xmm6, %xmm7 - pslld $2, %xmm2 - pslld $2, %xmm6 - pxor %xmm1, %xmm3 - pxor %xmm5, %xmm7 - pxor %xmm2, %xmm3 - pxor %xmm6, %xmm7 - paddd %xmm0, %xmm3 - paddd %xmm4, %xmm7 - movdqa %xmm3, 8*16(%rax) - movdqa %xmm7, 9*16(%rax) - - movdqa %xmm3, %xmm2 - movdqa %xmm7, %xmm6 - psrld $10, %xmm3 - psrld $10, %xmm7 - movdqa %xmm3, %xmm1 - movdqa %xmm7, %xmm5 - pslld $13, %xmm2 - pslld $13, %xmm6 - psrld $7, %xmm1 - psrld $7, %xmm5 - pxor %xmm1, %xmm3 - pxor %xmm5, %xmm7 - psrld $2, %xmm1 - psrld $2, %xmm5 - pxor %xmm2, %xmm3 - pxor %xmm6, %xmm7 - pslld $2, %xmm2 - pslld $2, %xmm6 - pxor %xmm1, %xmm3 - pxor %xmm5, %xmm7 - pxor %xmm2, %xmm3 - pxor %xmm6, %xmm7 - paddd 3*16(%rax), %xmm3 - paddd 4*16(%rax), %xmm7 - movdqa %xmm3, 10*16(%rax) - movdqa %xmm7, 11*16(%rax) - - movdqa %xmm3, %xmm2 - movdqa %xmm7, %xmm6 - psrld $10, %xmm3 - psrld $10, %xmm7 - movdqa %xmm3, %xmm1 - movdqa %xmm7, %xmm5 - pslld $13, %xmm2 - pslld $13, %xmm6 - psrld $7, %xmm1 - psrld $7, %xmm5 - pxor %xmm1, %xmm3 - pxor %xmm5, %xmm7 - psrld $2, %xmm1 - psrld $2, %xmm5 - pxor %xmm2, %xmm3 - pxor %xmm6, %xmm7 - pslld $2, %xmm2 - pslld $2, %xmm6 - pxor %xmm1, %xmm3 - pxor %xmm5, %xmm7 - pxor %xmm2, %xmm3 - pxor %xmm6, %xmm7 - paddd 5*16(%rax), %xmm3 - paddd 6*16(%rax), %xmm7 - movdqa %xmm3, 12*16(%rax) - movdqa %xmm7, 13*16(%rax) - - movdqa 14*16(%rax), %xmm0 - movdqa 15*16(%rax), %xmm4 - movdqa %xmm0, 17*16(%rsp) - movdqa %xmm4, 18*16(%rsp) - movdqa %xmm3, %xmm2 - movdqa %xmm7, %xmm6 - psrld $10, %xmm3 - psrld $10, %xmm7 - movdqa %xmm3, %xmm1 - movdqa %xmm7, %xmm5 - paddd 7*16(%rax), %xmm0 - paddd 8*16(%rax), %xmm4 - pslld $13, %xmm2 - pslld $13, %xmm6 - psrld $7, %xmm1 - psrld $7, %xmm5 - pxor %xmm1, %xmm3 - pxor %xmm5, %xmm7 - psrld $2, %xmm1 - psrld $2, %xmm5 - pxor %xmm2, %xmm3 - pxor %xmm6, %xmm7 - pslld $2, %xmm2 - pslld $2, %xmm6 - pxor %xmm1, %xmm3 - pxor %xmm5, %xmm7 - pxor %xmm2, %xmm3 - pxor %xmm6, %xmm7 - paddd %xmm0, %xmm3 - paddd %xmm4, %xmm7 - movdqa %xmm3, 14*16(%rax) - movdqa %xmm7, 15*16(%rax) - -sha256d_ms_4way_sse2_extend_loop2: - sha256_sse2_extend_doubleround 16 - sha256_sse2_extend_doubleround 18 - sha256_sse2_extend_doubleround 20 - sha256_sse2_extend_doubleround 22 - sha256_sse2_extend_doubleround 24 - sha256_sse2_extend_doubleround 26 - sha256_sse2_extend_doubleround 28 - sha256_sse2_extend_doubleround 30 - sha256_sse2_extend_doubleround 32 - sha256_sse2_extend_doubleround 34 - sha256_sse2_extend_doubleround 36 - sha256_sse2_extend_doubleround 38 - sha256_sse2_extend_doubleround 40 - sha256_sse2_extend_doubleround 42 - jz sha256d_ms_4way_sse2_extend_coda2 - sha256_sse2_extend_doubleround 44 - sha256_sse2_extend_doubleround 46 - - movdqa 0(%rcx), %xmm3 - movdqa 16(%rcx), %xmm0 - movdqa 32(%rcx), %xmm1 - movdqa 48(%rcx), %xmm2 - movdqa 64(%rcx), %xmm6 - movdqa 80(%rcx), %xmm7 - movdqa 96(%rcx), %xmm5 - movdqa 112(%rcx), %xmm4 - movdqa %xmm1, 0(%rsp) - movdqa %xmm2, 16(%rsp) - movdqa %xmm6, 32(%rsp) - - movq %rsi, %rax - leaq sha256_4k(%rip), %rcx - jmp sha256d_ms_4way_sse2_main_loop1 - -sha256d_ms_4way_sse2_main_loop2: - sha256_sse2_main_round 0 - sha256_sse2_main_round 1 - sha256_sse2_main_round 2 -sha256d_ms_4way_sse2_main_loop1: - sha256_sse2_main_round 3 - sha256_sse2_main_quadround 4 - sha256_sse2_main_quadround 8 - sha256_sse2_main_quadround 12 - sha256_sse2_main_quadround 16 - sha256_sse2_main_quadround 20 - sha256_sse2_main_quadround 24 - sha256_sse2_main_quadround 28 - sha256_sse2_main_quadround 32 - sha256_sse2_main_quadround 36 - sha256_sse2_main_quadround 40 - sha256_sse2_main_quadround 44 - sha256_sse2_main_quadround 48 - sha256_sse2_main_quadround 52 - sha256_sse2_main_round 56 - jz sha256d_ms_4way_sse2_finish - sha256_sse2_main_round 57 - sha256_sse2_main_round 58 - sha256_sse2_main_round 59 - sha256_sse2_main_quadround 60 - - movdqa 5*16(%rsp), %xmm1 - movdqa 6*16(%rsp), %xmm2 - movdqa 7*16(%rsp), %xmm6 - movdqa %xmm1, 18*16(%rsi) - movdqa %xmm2, 19*16(%rsi) - movdqa %xmm6, 20*16(%rsi) - movdqa 9*16(%rsp), %xmm1 - movdqa 10*16(%rsp), %xmm2 - movdqa 11*16(%rsp), %xmm6 - movdqa %xmm1, 22*16(%rsi) - movdqa %xmm2, 23*16(%rsi) - movdqa %xmm6, 24*16(%rsi) - movdqa 17*16(%rsp), %xmm1 - movdqa 18*16(%rsp), %xmm2 - movdqa %xmm1, 30*16(%rsi) - movdqa %xmm2, 31*16(%rsi) - - movdqa 0(%rsp), %xmm1 - movdqa 16(%rsp), %xmm2 - movdqa 32(%rsp), %xmm6 - paddd 0(%rdx), %xmm7 - paddd 16(%rdx), %xmm5 - paddd 32(%rdx), %xmm4 - paddd 48(%rdx), %xmm3 - paddd 64(%rdx), %xmm0 - paddd 80(%rdx), %xmm1 - paddd 96(%rdx), %xmm2 - paddd 112(%rdx), %xmm6 - - movdqa %xmm7, 48+0(%rsp) - movdqa %xmm5, 48+16(%rsp) - movdqa %xmm4, 48+32(%rsp) - movdqa %xmm3, 48+48(%rsp) - movdqa %xmm0, 48+64(%rsp) - movdqa %xmm1, 48+80(%rsp) - movdqa %xmm2, 48+96(%rsp) - movdqa %xmm6, 48+112(%rsp) - - pxor %xmm0, %xmm0 - movq $0x8000000000000100, %rax - movd %rax, %xmm1 - pshufd $0x55, %xmm1, %xmm2 - pshufd $0x00, %xmm1, %xmm1 - movdqa %xmm2, 48+128(%rsp) - movdqa %xmm0, 48+144(%rsp) - movdqa %xmm0, 48+160(%rsp) - movdqa %xmm0, 48+176(%rsp) - movdqa %xmm0, 48+192(%rsp) - movdqa %xmm0, 48+208(%rsp) - movdqa %xmm0, 48+224(%rsp) - movdqa %xmm1, 48+240(%rsp) - - leaq 19*16(%rsp), %rax - cmpq %rax, %rax - - movdqa -15*16(%rax), %xmm0 - movdqa -14*16(%rax), %xmm4 - movdqa %xmm0, %xmm2 - movdqa %xmm4, %xmm6 - psrld $3, %xmm0 - psrld $3, %xmm4 - movdqa %xmm0, %xmm1 - movdqa %xmm4, %xmm5 - pslld $14, %xmm2 - pslld $14, %xmm6 - psrld $4, %xmm1 - psrld $4, %xmm5 - pxor %xmm1, %xmm0 - pxor %xmm5, %xmm4 - psrld $11, %xmm1 - psrld $11, %xmm5 - pxor %xmm2, %xmm0 - pxor %xmm6, %xmm4 - pslld $11, %xmm2 - pslld $11, %xmm6 - pxor %xmm1, %xmm0 - pxor %xmm5, %xmm4 - pxor %xmm2, %xmm0 - pxor %xmm6, %xmm4 - paddd -16*16(%rax), %xmm0 - paddd -15*16(%rax), %xmm4 - paddd sha256d_4preext2_17(%rip), %xmm4 - movdqa %xmm0, %xmm3 - movdqa %xmm4, %xmm7 - movdqa %xmm3, 0*16(%rax) - movdqa %xmm7, 1*16(%rax) - - sha256_sse2_extend_doubleround 2 - sha256_sse2_extend_doubleround 4 - - movdqa -9*16(%rax), %xmm0 - movdqa sha256d_4preext2_23(%rip), %xmm4 - movdqa %xmm0, %xmm2 - psrld $3, %xmm0 - movdqa %xmm0, %xmm1 - pslld $14, %xmm2 - psrld $4, %xmm1 - pxor %xmm1, %xmm0 - pxor %xmm2, %xmm0 - psrld $11, %xmm1 - pslld $11, %xmm2 - pxor %xmm1, %xmm0 - pxor %xmm2, %xmm0 - paddd -10*16(%rax), %xmm0 - paddd -9*16(%rax), %xmm4 - movdqa %xmm3, %xmm2 - movdqa %xmm7, %xmm6 - psrld $10, %xmm3 - psrld $10, %xmm7 - movdqa %xmm3, %xmm1 - movdqa %xmm7, %xmm5 - paddd -1*16(%rax), %xmm0 - pslld $13, %xmm2 - pslld $13, %xmm6 - psrld $7, %xmm1 - psrld $7, %xmm5 - paddd 0*16(%rax), %xmm4 - pxor %xmm1, %xmm3 - pxor %xmm5, %xmm7 - psrld $2, %xmm1 - psrld $2, %xmm5 - pxor %xmm2, %xmm3 - pxor %xmm6, %xmm7 - pslld $2, %xmm2 - pslld $2, %xmm6 - pxor %xmm1, %xmm3 - pxor %xmm5, %xmm7 - pxor %xmm2, %xmm3 - pxor %xmm6, %xmm7 - paddd %xmm0, %xmm3 - paddd %xmm4, %xmm7 - movdqa %xmm3, 6*16(%rax) - movdqa %xmm7, 7*16(%rax) - - movdqa sha256d_4preext2_24(%rip), %xmm0 - movdqa %xmm3, %xmm2 - movdqa %xmm7, %xmm6 - psrld $10, %xmm3 - psrld $10, %xmm7 - movdqa %xmm3, %xmm1 - movdqa %xmm7, %xmm5 - paddd 1*16(%rax), %xmm0 - pslld $13, %xmm2 - pslld $13, %xmm6 - psrld $7, %xmm1 - psrld $7, %xmm5 - pxor %xmm1, %xmm3 - pxor %xmm5, %xmm7 - psrld $2, %xmm1 - psrld $2, %xmm5 - pxor %xmm2, %xmm3 - pxor %xmm6, %xmm7 - pslld $2, %xmm2 - pslld $2, %xmm6 - pxor %xmm1, %xmm3 - pxor %xmm5, %xmm7 - pxor %xmm2, %xmm3 - pxor %xmm6, %xmm7 - paddd %xmm0, %xmm3 - paddd 2*16(%rax), %xmm7 - movdqa %xmm3, 8*16(%rax) - movdqa %xmm7, 9*16(%rax) - - movdqa %xmm3, %xmm2 - movdqa %xmm7, %xmm6 - psrld $10, %xmm3 - psrld $10, %xmm7 - movdqa %xmm3, %xmm1 - movdqa %xmm7, %xmm5 - pslld $13, %xmm2 - pslld $13, %xmm6 - psrld $7, %xmm1 - psrld $7, %xmm5 - pxor %xmm1, %xmm3 - pxor %xmm5, %xmm7 - psrld $2, %xmm1 - psrld $2, %xmm5 - pxor %xmm2, %xmm3 - pxor %xmm6, %xmm7 - pslld $2, %xmm2 - pslld $2, %xmm6 - pxor %xmm1, %xmm3 - pxor %xmm5, %xmm7 - pxor %xmm2, %xmm3 - pxor %xmm6, %xmm7 - paddd 3*16(%rax), %xmm3 - paddd 4*16(%rax), %xmm7 - movdqa %xmm3, 10*16(%rax) - movdqa %xmm7, 11*16(%rax) - - movdqa %xmm3, %xmm2 - movdqa %xmm7, %xmm6 - psrld $10, %xmm3 - psrld $10, %xmm7 - movdqa %xmm3, %xmm1 - movdqa %xmm7, %xmm5 - pslld $13, %xmm2 - pslld $13, %xmm6 - psrld $7, %xmm1 - psrld $7, %xmm5 - pxor %xmm1, %xmm3 - pxor %xmm5, %xmm7 - psrld $2, %xmm1 - psrld $2, %xmm5 - pxor %xmm2, %xmm3 - pxor %xmm6, %xmm7 - pslld $2, %xmm2 - pslld $2, %xmm6 - pxor %xmm1, %xmm3 - pxor %xmm5, %xmm7 - pxor %xmm2, %xmm3 - pxor %xmm6, %xmm7 - paddd 5*16(%rax), %xmm3 - paddd 6*16(%rax), %xmm7 - movdqa %xmm3, 12*16(%rax) - movdqa %xmm7, 13*16(%rax) - - movdqa sha256d_4preext2_30(%rip), %xmm0 - movdqa 0*16(%rax), %xmm4 - movdqa %xmm4, %xmm6 - psrld $3, %xmm4 - movdqa %xmm4, %xmm5 - pslld $14, %xmm6 - psrld $4, %xmm5 - pxor %xmm5, %xmm4 - pxor %xmm6, %xmm4 - psrld $11, %xmm5 - pslld $11, %xmm6 - pxor %xmm5, %xmm4 - pxor %xmm6, %xmm4 - paddd -1*16(%rax), %xmm4 - movdqa %xmm3, %xmm2 - movdqa %xmm7, %xmm6 - psrld $10, %xmm3 - psrld $10, %xmm7 - movdqa %xmm3, %xmm1 - movdqa %xmm7, %xmm5 - paddd 7*16(%rax), %xmm0 - pslld $13, %xmm2 - pslld $13, %xmm6 - psrld $7, %xmm1 - psrld $7, %xmm5 - paddd 8*16(%rax), %xmm4 - pxor %xmm1, %xmm3 - pxor %xmm5, %xmm7 - psrld $2, %xmm1 - psrld $2, %xmm5 - pxor %xmm2, %xmm3 - pxor %xmm6, %xmm7 - pslld $2, %xmm2 - pslld $2, %xmm6 - pxor %xmm1, %xmm3 - pxor %xmm5, %xmm7 - pxor %xmm2, %xmm3 - pxor %xmm6, %xmm7 - paddd %xmm0, %xmm3 - paddd %xmm4, %xmm7 - movdqa %xmm3, 14*16(%rax) - movdqa %xmm7, 15*16(%rax) - - jmp sha256d_ms_4way_sse2_extend_loop2 - -sha256d_ms_4way_sse2_extend_coda2: - sha256_sse2_extend_round 44 - - movdqa sha256_4h+0(%rip), %xmm7 - movdqa sha256_4h+16(%rip), %xmm5 - movdqa sha256_4h+32(%rip), %xmm4 - movdqa sha256_4h+48(%rip), %xmm3 - movdqa sha256_4h+64(%rip), %xmm0 - movdqa sha256_4h+80(%rip), %xmm1 - movdqa sha256_4h+96(%rip), %xmm2 - movdqa sha256_4h+112(%rip), %xmm6 - movdqa %xmm1, 0(%rsp) - movdqa %xmm2, 16(%rsp) - movdqa %xmm6, 32(%rsp) - - leaq 48(%rsp), %rax - leaq sha256_4k(%rip), %rcx - jmp sha256d_ms_4way_sse2_main_loop2 - -.macro sha256_sse2_main_round_red i, r7 - movdqa 16*\i(%rax), %xmm6 - paddd 16*\i(%rcx), %xmm6 - paddd 32(%rsp), %xmm6 - movdqa %xmm0, %xmm1 - movdqa 16(%rsp), %xmm2 - paddd \r7, %xmm6 - pandn %xmm2, %xmm1 - movdqa %xmm2, 32(%rsp) - movdqa 0(%rsp), %xmm2 - movdqa %xmm2, 16(%rsp) - pand %xmm0, %xmm2 - pxor %xmm2, %xmm1 - movdqa %xmm0, 0(%rsp) - paddd %xmm1, %xmm6 - movdqa %xmm0, %xmm1 - psrld $6, %xmm0 - movdqa %xmm0, %xmm2 - pslld $7, %xmm1 - psrld $5, %xmm2 - pxor %xmm1, %xmm0 - pxor %xmm2, %xmm0 - pslld $14, %xmm1 - psrld $14, %xmm2 - pxor %xmm1, %xmm0 - pxor %xmm2, %xmm0 - pslld $5, %xmm1 - pxor %xmm1, %xmm0 - paddd %xmm6, %xmm0 -.endm - -sha256d_ms_4way_sse2_finish: - sha256_sse2_main_round_red 57, %xmm3 - sha256_sse2_main_round_red 58, %xmm4 - sha256_sse2_main_round_red 59, %xmm5 - sha256_sse2_main_round_red 60, %xmm7 - - paddd sha256_4h+112(%rip), %xmm0 - movdqa %xmm0, 112(%rdi) - - addq $8+67*16, %rsp -#if defined(_WIN64) || defined(__CYGWIN__) - popq %rsi - movdqa 0(%rsp), %xmm6 - movdqa 16(%rsp), %xmm7 - addq $32, %rsp - popq %rdi -#endif - ret - - -#if defined(USE_AVX) - - .p2align 6 -sha256d_ms_4way_avx: -#if defined(_WIN64) || defined(__CYGWIN__) - pushq %rdi - subq $80, %rsp - movdqa %xmm6, 0(%rsp) - movdqa %xmm7, 16(%rsp) - movdqa %xmm8, 32(%rsp) - movdqa %xmm9, 48(%rsp) - movdqa %xmm10, 64(%rsp) - pushq %rsi - movq %rcx, %rdi - movq %rdx, %rsi - movq %r8, %rdx - movq %r9, %rcx -#endif - subq $1032, %rsp - - leaq 256(%rsi), %rax - -sha256d_ms_4way_avx_extend_loop1: - vmovdqa 3*16(%rsi), %xmm0 - vmovdqa 2*16(%rax), %xmm3 - vmovdqa 3*16(%rax), %xmm7 - vmovdqa %xmm3, 2*16(%rsp) - vmovdqa %xmm7, 3*16(%rsp) - vpaddd %xmm0, %xmm7, %xmm7 - vpslld $14, %xmm0, %xmm2 - vpsrld $3, %xmm0, %xmm0 - vpsrld $4, %xmm0, %xmm1 - vpxor %xmm1, %xmm0, %xmm0 - vpxor %xmm2, %xmm0, %xmm0 - vpsrld $11, %xmm1, %xmm1 - vpslld $11, %xmm2, %xmm2 - vpxor %xmm1, %xmm0, %xmm0 - vpxor %xmm2, %xmm0, %xmm0 - vpaddd %xmm0, %xmm3, %xmm3 - vmovdqa %xmm3, 2*16(%rax) - vmovdqa %xmm7, 3*16(%rax) - - vmovdqa 4*16(%rax), %xmm0 - vmovdqa %xmm0, 4*16(%rsp) - vpslld $13, %xmm3, %xmm2 - vpslld $13, %xmm7, %xmm6 - vpsrld $10, %xmm3, %xmm3 - vpsrld $10, %xmm7, %xmm7 - vpsrld $7, %xmm3, %xmm1 - vpsrld $7, %xmm7, %xmm5 - vpxor %xmm1, %xmm3, %xmm3 - vpxor %xmm5, %xmm7, %xmm7 - vpsrld $2, %xmm1, %xmm1 - vpsrld $2, %xmm5, %xmm5 - vpxor %xmm2, %xmm3, %xmm3 - vpxor %xmm6, %xmm7, %xmm7 - vpslld $2, %xmm2, %xmm2 - vpslld $2, %xmm6, %xmm6 - vpxor %xmm1, %xmm3, %xmm3 - vpxor %xmm5, %xmm7, %xmm7 - vpxor %xmm2, %xmm3, %xmm3 - vpxor %xmm6, %xmm7, %xmm7 - vpaddd %xmm0, %xmm3, %xmm3 - vmovdqa %xmm3, 4*16(%rax) - vmovdqa %xmm7, 5*16(%rax) - - vmovdqa 6*16(%rax), %xmm0 - vmovdqa 7*16(%rax), %xmm4 - vmovdqa %xmm0, 6*16(%rsp) - vmovdqa %xmm4, 7*16(%rsp) - vpslld $13, %xmm3, %xmm2 - vpslld $13, %xmm7, %xmm6 - vpsrld $10, %xmm3, %xmm3 - vpsrld $10, %xmm7, %xmm7 - vpsrld $7, %xmm3, %xmm1 - vpsrld $7, %xmm7, %xmm5 - vpxor %xmm1, %xmm3, %xmm3 - vpxor %xmm5, %xmm7, %xmm7 - vpsrld $2, %xmm1, %xmm1 - vpsrld $2, %xmm5, %xmm5 - vpxor %xmm2, %xmm3, %xmm3 - vpxor %xmm6, %xmm7, %xmm7 - vpslld $2, %xmm2, %xmm2 - vpslld $2, %xmm6, %xmm6 - vpxor %xmm1, %xmm3, %xmm3 - vpxor %xmm5, %xmm7, %xmm7 - vpxor %xmm2, %xmm3, %xmm3 - vpxor %xmm6, %xmm7, %xmm7 - vpaddd %xmm0, %xmm3, %xmm3 - vpaddd %xmm4, %xmm7, %xmm7 - vmovdqa %xmm3, 6*16(%rax) - vmovdqa %xmm7, 7*16(%rax) - - vmovdqa 8*16(%rax), %xmm0 - vmovdqa 2*16(%rax), %xmm4 - vmovdqa %xmm0, 8*16(%rsp) - vpslld $13, %xmm3, %xmm2 - vpslld $13, %xmm7, %xmm6 - vpsrld $10, %xmm3, %xmm3 - vpsrld $10, %xmm7, %xmm7 - vpsrld $7, %xmm3, %xmm1 - vpsrld $7, %xmm7, %xmm5 - vpxor %xmm1, %xmm3, %xmm3 - vpxor %xmm5, %xmm7, %xmm7 - vpsrld $2, %xmm1, %xmm1 - vpsrld $2, %xmm5, %xmm5 - vpxor %xmm2, %xmm3, %xmm3 - vpxor %xmm6, %xmm7, %xmm7 - vpslld $2, %xmm2, %xmm2 - vpslld $2, %xmm6, %xmm6 - vpxor %xmm1, %xmm3, %xmm3 - vpxor %xmm5, %xmm7, %xmm7 - vpxor %xmm2, %xmm3, %xmm3 - vpxor %xmm6, %xmm7, %xmm7 - vpaddd %xmm0, %xmm3, %xmm3 - vpaddd %xmm4, %xmm7, %xmm7 - vmovdqa %xmm3, 8*16(%rax) - vmovdqa %xmm7, 9*16(%rax) - - vpslld $13, %xmm3, %xmm2 - vpslld $13, %xmm7, %xmm6 - vpsrld $10, %xmm3, %xmm3 - vpsrld $10, %xmm7, %xmm7 - vpsrld $7, %xmm3, %xmm1 - vpsrld $7, %xmm7, %xmm5 - vpxor %xmm1, %xmm3, %xmm3 - vpxor %xmm5, %xmm7, %xmm7 - vpsrld $2, %xmm1, %xmm1 - vpsrld $2, %xmm5, %xmm5 - vpxor %xmm2, %xmm3, %xmm3 - vpxor %xmm6, %xmm7, %xmm7 - vpslld $2, %xmm2, %xmm2 - vpslld $2, %xmm6, %xmm6 - vpxor %xmm1, %xmm3, %xmm3 - vpxor %xmm5, %xmm7, %xmm7 - vpxor %xmm2, %xmm3, %xmm3 - vpxor %xmm6, %xmm7, %xmm7 - vpaddd 3*16(%rax), %xmm3, %xmm3 - vpaddd 4*16(%rax), %xmm7, %xmm7 - vmovdqa %xmm3, 10*16(%rax) - vmovdqa %xmm7, 11*16(%rax) - - vpslld $13, %xmm3, %xmm2 - vpslld $13, %xmm7, %xmm6 - vpsrld $10, %xmm3, %xmm3 - vpsrld $10, %xmm7, %xmm7 - vpsrld $7, %xmm3, %xmm1 - vpsrld $7, %xmm7, %xmm5 - vpxor %xmm1, %xmm3, %xmm3 - vpxor %xmm5, %xmm7, %xmm7 - vpsrld $2, %xmm1, %xmm1 - vpsrld $2, %xmm5, %xmm5 - vpxor %xmm2, %xmm3, %xmm3 - vpxor %xmm6, %xmm7, %xmm7 - vpslld $2, %xmm2, %xmm2 - vpslld $2, %xmm6, %xmm6 - vpxor %xmm1, %xmm3, %xmm3 - vpxor %xmm5, %xmm7, %xmm7 - vpxor %xmm2, %xmm3, %xmm3 - vpxor %xmm6, %xmm7, %xmm7 - vpaddd 5*16(%rax), %xmm3, %xmm3 - vpaddd 6*16(%rax), %xmm7, %xmm7 - vmovdqa %xmm3, 12*16(%rax) - vmovdqa %xmm7, 13*16(%rax) - - vmovdqa 14*16(%rax), %xmm0 - vmovdqa 15*16(%rax), %xmm4 - vmovdqa %xmm0, 14*16(%rsp) - vmovdqa %xmm4, 15*16(%rsp) - vpslld $13, %xmm3, %xmm2 - vpslld $13, %xmm7, %xmm6 - vpsrld $10, %xmm3, %xmm3 - vpsrld $10, %xmm7, %xmm7 - vpaddd 7*16(%rax), %xmm0, %xmm0 - vpaddd 8*16(%rax), %xmm4, %xmm4 - vpsrld $7, %xmm3, %xmm1 - vpsrld $7, %xmm7, %xmm5 - vpxor %xmm1, %xmm3, %xmm3 - vpxor %xmm5, %xmm7, %xmm7 - vpsrld $2, %xmm1, %xmm1 - vpsrld $2, %xmm5, %xmm5 - vpxor %xmm2, %xmm3, %xmm3 - vpxor %xmm6, %xmm7, %xmm7 - vpslld $2, %xmm2, %xmm2 - vpslld $2, %xmm6, %xmm6 - vpxor %xmm1, %xmm3, %xmm3 - vpxor %xmm5, %xmm7, %xmm7 - vpxor %xmm2, %xmm3, %xmm3 - vpxor %xmm6, %xmm7, %xmm7 - vpaddd %xmm0, %xmm3, %xmm3 - vpaddd %xmm4, %xmm7, %xmm7 - vmovdqa %xmm3, 14*16(%rax) - vmovdqa %xmm7, 15*16(%rax) - -sha256d_ms_4way_avx_extend_loop2: - sha256_avx_extend_doubleround 16 - sha256_avx_extend_doubleround 18 - sha256_avx_extend_doubleround 20 - sha256_avx_extend_doubleround 22 - sha256_avx_extend_doubleround 24 - sha256_avx_extend_doubleround 26 - sha256_avx_extend_doubleround 28 - sha256_avx_extend_doubleround 30 - sha256_avx_extend_doubleround 32 - sha256_avx_extend_doubleround 34 - sha256_avx_extend_doubleround 36 - sha256_avx_extend_doubleround 38 - sha256_avx_extend_doubleround 40 - sha256_avx_extend_doubleround 42 - jz sha256d_ms_4way_avx_extend_coda2 - sha256_avx_extend_doubleround 44 - sha256_avx_extend_doubleround 46 - - movdqa 0(%rcx), %xmm7 - movdqa 16(%rcx), %xmm8 - movdqa 32(%rcx), %xmm9 - movdqa 48(%rcx), %xmm10 - movdqa 64(%rcx), %xmm0 - movdqa 80(%rcx), %xmm5 - movdqa 96(%rcx), %xmm4 - movdqa 112(%rcx), %xmm3 - - movq %rsi, %rax - leaq sha256_4k(%rip), %rcx - jmp sha256d_ms_4way_avx_main_loop1 - -sha256d_ms_4way_avx_main_loop2: - sha256_avx_main_round 0, %xmm10, %xmm9, %xmm8, %xmm0, %xmm3, %xmm4, %xmm5, %xmm7 - sha256_avx_main_round 1, %xmm9, %xmm8, %xmm0, %xmm10, %xmm4, %xmm5, %xmm7, %xmm3 - sha256_avx_main_round 2, %xmm8, %xmm0, %xmm10, %xmm9, %xmm5, %xmm7, %xmm3, %xmm4 -sha256d_ms_4way_avx_main_loop1: - sha256_avx_main_round 3, %xmm0, %xmm10, %xmm9, %xmm8, %xmm7, %xmm3, %xmm4, %xmm5 - sha256_avx_main_quadround 4 - sha256_avx_main_quadround 8 - sha256_avx_main_quadround 12 - sha256_avx_main_quadround 16 - sha256_avx_main_quadround 20 - sha256_avx_main_quadround 24 - sha256_avx_main_quadround 28 - sha256_avx_main_quadround 32 - sha256_avx_main_quadround 36 - sha256_avx_main_quadround 40 - sha256_avx_main_quadround 44 - sha256_avx_main_quadround 48 - sha256_avx_main_quadround 52 - sha256_avx_main_round 56, %xmm10, %xmm9, %xmm8, %xmm0, %xmm3, %xmm4, %xmm5, %xmm7 - jz sha256d_ms_4way_avx_finish - sha256_avx_main_round 57, %xmm9, %xmm8, %xmm0, %xmm10, %xmm4, %xmm5, %xmm7, %xmm3 - sha256_avx_main_round 58, %xmm8, %xmm0, %xmm10, %xmm9, %xmm5, %xmm7, %xmm3, %xmm4 - sha256_avx_main_round 59, %xmm0, %xmm10, %xmm9, %xmm8, %xmm7, %xmm3, %xmm4, %xmm5 - sha256_avx_main_quadround 60 - - movdqa 2*16(%rsp), %xmm1 - movdqa 3*16(%rsp), %xmm2 - movdqa 4*16(%rsp), %xmm6 - movdqa %xmm1, 18*16(%rsi) - movdqa %xmm2, 19*16(%rsi) - movdqa %xmm6, 20*16(%rsi) - movdqa 6*16(%rsp), %xmm1 - movdqa 7*16(%rsp), %xmm2 - movdqa 8*16(%rsp), %xmm6 - movdqa %xmm1, 22*16(%rsi) - movdqa %xmm2, 23*16(%rsi) - movdqa %xmm6, 24*16(%rsi) - movdqa 14*16(%rsp), %xmm1 - movdqa 15*16(%rsp), %xmm2 - movdqa %xmm1, 30*16(%rsi) - movdqa %xmm2, 31*16(%rsi) - - paddd 0(%rdx), %xmm7 - paddd 16(%rdx), %xmm5 - paddd 32(%rdx), %xmm4 - paddd 48(%rdx), %xmm3 - paddd 64(%rdx), %xmm0 - paddd 80(%rdx), %xmm8 - paddd 96(%rdx), %xmm9 - paddd 112(%rdx), %xmm10 - - movdqa %xmm7, 0(%rsp) - movdqa %xmm5, 16(%rsp) - movdqa %xmm4, 32(%rsp) - movdqa %xmm3, 48(%rsp) - movdqa %xmm0, 64(%rsp) - movdqa %xmm8, 80(%rsp) - movdqa %xmm9, 96(%rsp) - movdqa %xmm10, 112(%rsp) - - pxor %xmm0, %xmm0 - movq $0x8000000000000100, %rax - movd %rax, %xmm1 - pshufd $0x55, %xmm1, %xmm2 - pshufd $0x00, %xmm1, %xmm1 - movdqa %xmm2, 128(%rsp) - movdqa %xmm0, 144(%rsp) - movdqa %xmm0, 160(%rsp) - movdqa %xmm0, 176(%rsp) - movdqa %xmm0, 192(%rsp) - movdqa %xmm0, 208(%rsp) - movdqa %xmm0, 224(%rsp) - movdqa %xmm1, 240(%rsp) - - leaq 256(%rsp), %rax - cmpq %rax, %rax - - vmovdqa -15*16(%rax), %xmm0 - vmovdqa -14*16(%rax), %xmm4 - vpslld $14, %xmm0, %xmm2 - vpslld $14, %xmm4, %xmm6 - vpsrld $3, %xmm0, %xmm8 - vpsrld $3, %xmm4, %xmm4 - vpsrld $7, %xmm0, %xmm1 - vpsrld $4, %xmm4, %xmm5 - vpxor %xmm1, %xmm8, %xmm8 - vpxor %xmm5, %xmm4, %xmm4 - vpsrld $11, %xmm1, %xmm1 - vpsrld $11, %xmm5, %xmm5 - vpxor %xmm2, %xmm8, %xmm8 - vpxor %xmm6, %xmm4, %xmm4 - vpslld $11, %xmm2, %xmm2 - vpslld $11, %xmm6, %xmm6 - vpxor %xmm1, %xmm8, %xmm8 - vpxor %xmm5, %xmm4, %xmm4 - vpxor %xmm2, %xmm8, %xmm8 - vpxor %xmm6, %xmm4, %xmm4 - vpaddd %xmm0, %xmm4, %xmm4 - vpaddd -16*16(%rax), %xmm8, %xmm3 - vpaddd sha256d_4preext2_17(%rip), %xmm4, %xmm7 - vmovdqa %xmm3, 0*16(%rax) - vmovdqa %xmm7, 1*16(%rax) - - sha256_avx_extend_doubleround 2 - sha256_avx_extend_doubleround 4 - - vmovdqa -9*16(%rax), %xmm0 - vpslld $14, %xmm0, %xmm2 - vpsrld $3, %xmm0, %xmm8 - vpsrld $7, %xmm0, %xmm1 - vpxor %xmm1, %xmm8, %xmm8 - vpxor %xmm2, %xmm8, %xmm8 - vpsrld $11, %xmm1, %xmm1 - vpslld $11, %xmm2, %xmm2 - vpxor %xmm1, %xmm8, %xmm8 - vpxor %xmm2, %xmm8, %xmm8 - vpaddd sha256d_4preext2_23(%rip), %xmm0, %xmm4 - vpaddd -10*16(%rax), %xmm8, %xmm0 - vpslld $13, %xmm3, %xmm2 - vpslld $13, %xmm7, %xmm6 - vpsrld $10, %xmm3, %xmm3 - vpsrld $10, %xmm7, %xmm7 - vpaddd -1*16(%rax), %xmm0, %xmm0 - vpaddd 0*16(%rax), %xmm4, %xmm4 - vpsrld $7, %xmm3, %xmm1 - vpsrld $7, %xmm7, %xmm5 - vpxor %xmm1, %xmm3, %xmm3 - vpxor %xmm5, %xmm7, %xmm7 - vpsrld $2, %xmm1, %xmm1 - vpsrld $2, %xmm5, %xmm5 - vpxor %xmm2, %xmm3, %xmm3 - vpxor %xmm6, %xmm7, %xmm7 - vpslld $2, %xmm2, %xmm2 - vpslld $2, %xmm6, %xmm6 - vpxor %xmm1, %xmm3, %xmm3 - vpxor %xmm5, %xmm7, %xmm7 - vpxor %xmm2, %xmm3, %xmm3 - vpxor %xmm6, %xmm7, %xmm7 - vpaddd %xmm0, %xmm3, %xmm3 - vpaddd %xmm4, %xmm7, %xmm7 - vmovdqa %xmm3, 6*16(%rax) - vmovdqa %xmm7, 7*16(%rax) - - vpslld $13, %xmm3, %xmm2 - vpslld $13, %xmm7, %xmm6 - vpsrld $10, %xmm3, %xmm3 - vpsrld $10, %xmm7, %xmm7 - vpsrld $7, %xmm3, %xmm1 - vpsrld $7, %xmm7, %xmm5 - vpxor %xmm1, %xmm3, %xmm3 - vpxor %xmm5, %xmm7, %xmm7 - vpsrld $2, %xmm1, %xmm1 - vpsrld $2, %xmm5, %xmm5 - vpxor %xmm2, %xmm3, %xmm3 - vpxor %xmm6, %xmm7, %xmm7 - vpslld $2, %xmm2, %xmm2 - vpslld $2, %xmm6, %xmm6 - vpxor %xmm1, %xmm3, %xmm3 - vpxor %xmm5, %xmm7, %xmm7 - vpxor %xmm2, %xmm3, %xmm3 - vpxor %xmm6, %xmm7, %xmm7 - vpaddd sha256d_4preext2_24(%rip), %xmm3, %xmm3 - vpaddd 1*16(%rax), %xmm3, %xmm3 - vpaddd 2*16(%rax), %xmm7, %xmm7 - vmovdqa %xmm3, 8*16(%rax) - vmovdqa %xmm7, 9*16(%rax) - - vpslld $13, %xmm3, %xmm2 - vpslld $13, %xmm7, %xmm6 - vpsrld $10, %xmm3, %xmm3 - vpsrld $10, %xmm7, %xmm7 - vpsrld $7, %xmm3, %xmm1 - vpsrld $7, %xmm7, %xmm5 - vpxor %xmm1, %xmm3, %xmm3 - vpxor %xmm5, %xmm7, %xmm7 - vpsrld $2, %xmm1, %xmm1 - vpsrld $2, %xmm5, %xmm5 - vpxor %xmm2, %xmm3, %xmm3 - vpxor %xmm6, %xmm7, %xmm7 - vpslld $2, %xmm2, %xmm2 - vpslld $2, %xmm6, %xmm6 - vpxor %xmm1, %xmm3, %xmm3 - vpxor %xmm5, %xmm7, %xmm7 - vpxor %xmm2, %xmm3, %xmm3 - vpxor %xmm6, %xmm7, %xmm7 - vpaddd 3*16(%rax), %xmm3, %xmm3 - vpaddd 4*16(%rax), %xmm7, %xmm7 - vmovdqa %xmm3, 10*16(%rax) - vmovdqa %xmm7, 11*16(%rax) - - vpslld $13, %xmm3, %xmm2 - vpslld $13, %xmm7, %xmm6 - vpsrld $10, %xmm3, %xmm3 - vpsrld $10, %xmm7, %xmm7 - vpsrld $7, %xmm3, %xmm1 - vpsrld $7, %xmm7, %xmm5 - vpxor %xmm1, %xmm3, %xmm3 - vpxor %xmm5, %xmm7, %xmm7 - vpsrld $2, %xmm1, %xmm1 - vpsrld $2, %xmm5, %xmm5 - vpxor %xmm2, %xmm3, %xmm3 - vpxor %xmm6, %xmm7, %xmm7 - vpslld $2, %xmm2, %xmm2 - vpslld $2, %xmm6, %xmm6 - vpxor %xmm1, %xmm3, %xmm3 - vpxor %xmm5, %xmm7, %xmm7 - vpxor %xmm2, %xmm3, %xmm3 - vpxor %xmm6, %xmm7, %xmm7 - vpaddd 5*16(%rax), %xmm3, %xmm3 - vpaddd 6*16(%rax), %xmm7, %xmm7 - vmovdqa %xmm3, 12*16(%rax) - vmovdqa %xmm7, 13*16(%rax) - - vmovdqa sha256d_4preext2_30(%rip), %xmm0 - vmovdqa 0*16(%rax), %xmm4 - vpslld $14, %xmm4, %xmm6 - vpsrld $3, %xmm4, %xmm4 - vpsrld $4, %xmm4, %xmm5 - vpxor %xmm5, %xmm4, %xmm4 - vpxor %xmm6, %xmm4, %xmm4 - vpsrld $11, %xmm5, %xmm5 - vpslld $11, %xmm6, %xmm6 - vpxor %xmm5, %xmm4, %xmm4 - vpxor %xmm6, %xmm4, %xmm4 - vpaddd -1*16(%rax), %xmm4, %xmm4 - vpslld $13, %xmm3, %xmm2 - vpslld $13, %xmm7, %xmm6 - vpsrld $10, %xmm3, %xmm3 - vpsrld $10, %xmm7, %xmm7 - vpaddd 7*16(%rax), %xmm0, %xmm0 - vpaddd 8*16(%rax), %xmm4, %xmm4 - vpsrld $7, %xmm3, %xmm1 - vpsrld $7, %xmm7, %xmm5 - vpxor %xmm1, %xmm3, %xmm3 - vpxor %xmm5, %xmm7, %xmm7 - vpsrld $2, %xmm1, %xmm1 - vpsrld $2, %xmm5, %xmm5 - vpxor %xmm2, %xmm3, %xmm3 - vpxor %xmm6, %xmm7, %xmm7 - vpslld $2, %xmm2, %xmm2 - vpslld $2, %xmm6, %xmm6 - vpxor %xmm1, %xmm3, %xmm3 - vpxor %xmm5, %xmm7, %xmm7 - vpxor %xmm2, %xmm3, %xmm3 - vpxor %xmm6, %xmm7, %xmm7 - vpaddd %xmm0, %xmm3, %xmm3 - vpaddd %xmm4, %xmm7, %xmm7 - vmovdqa %xmm3, 14*16(%rax) - vmovdqa %xmm7, 15*16(%rax) - - jmp sha256d_ms_4way_avx_extend_loop2 - -sha256d_ms_4way_avx_extend_coda2: - sha256_avx_extend_round 44 - - movdqa sha256_4h+0(%rip), %xmm7 - movdqa sha256_4h+16(%rip), %xmm5 - movdqa sha256_4h+32(%rip), %xmm4 - movdqa sha256_4h+48(%rip), %xmm3 - movdqa sha256_4h+64(%rip), %xmm0 - movdqa sha256_4h+80(%rip), %xmm8 - movdqa sha256_4h+96(%rip), %xmm9 - movdqa sha256_4h+112(%rip), %xmm10 - - movq %rsp, %rax - leaq sha256_4k(%rip), %rcx - jmp sha256d_ms_4way_avx_main_loop2 - -.macro sha256_avx_main_round_red i, r0, r1, r2, r3, r4 - vpaddd 16*\i(%rax), \r0, %xmm6 - vpaddd 16*\i(%rcx), %xmm6, %xmm6 - vpandn \r1, \r3, %xmm1 - vpand \r3, \r2, %xmm2 - vpxor %xmm2, %xmm1, %xmm1 - vpaddd %xmm1, %xmm6, %xmm6 - vpslld $7, \r3, %xmm1 - vpsrld $6, \r3, \r0 - vpsrld $5, \r0, %xmm2 - vpxor %xmm1, \r0, \r0 - vpxor %xmm2, \r0, \r0 - vpslld $14, %xmm1, %xmm1 - vpsrld $14, %xmm2, %xmm2 - vpxor %xmm1, \r0, \r0 - vpxor %xmm2, \r0, \r0 - vpslld $5, %xmm1, %xmm1 - vpxor %xmm1, \r0, \r0 - vpaddd \r0, %xmm6, %xmm6 - vpaddd %xmm6, \r4, \r0 -.endm - -sha256d_ms_4way_avx_finish: - sha256_avx_main_round_red 57, %xmm9, %xmm8, %xmm0, %xmm10, %xmm4 - sha256_avx_main_round_red 58, %xmm8, %xmm0, %xmm10, %xmm9, %xmm5 - sha256_avx_main_round_red 59, %xmm0, %xmm10, %xmm9, %xmm8, %xmm7 - sha256_avx_main_round_red 60, %xmm10, %xmm9, %xmm8, %xmm0, %xmm3 - - paddd sha256_4h+112(%rip), %xmm10 - movdqa %xmm10, 112(%rdi) - - addq $1032, %rsp -#if defined(_WIN64) || defined(__CYGWIN__) - popq %rsi - movdqa 0(%rsp), %xmm6 - movdqa 16(%rsp), %xmm7 - movdqa 32(%rsp), %xmm8 - movdqa 48(%rsp), %xmm9 - movdqa 64(%rsp), %xmm10 - addq $80, %rsp - popq %rdi -#endif - ret - -#endif /* USE_AVX */ - - -#if defined(USE_XOP) - - .p2align 6 -sha256d_ms_4way_xop: -#if defined(_WIN64) || defined(__CYGWIN__) - pushq %rdi - subq $80, %rsp - movdqa %xmm6, 0(%rsp) - movdqa %xmm7, 16(%rsp) - movdqa %xmm8, 32(%rsp) - movdqa %xmm9, 48(%rsp) - movdqa %xmm10, 64(%rsp) - pushq %rsi - movq %rcx, %rdi - movq %rdx, %rsi - movq %r8, %rdx - movq %r9, %rcx -#endif - subq $1032, %rsp - - leaq 256(%rsi), %rax - -sha256d_ms_4way_xop_extend_loop1: - vmovdqa 3*16(%rsi), %xmm0 - vmovdqa 2*16(%rax), %xmm3 - vmovdqa 3*16(%rax), %xmm7 - vmovdqa %xmm3, 2*16(%rsp) - vmovdqa %xmm7, 3*16(%rsp) - vpaddd %xmm0, %xmm7, %xmm7 - vprotd $25, %xmm0, %xmm1 - vprotd $14, %xmm0, %xmm2 - vpsrld $3, %xmm0, %xmm0 - vpxor %xmm1, %xmm2, %xmm2 - vpxor %xmm2, %xmm0, %xmm0 - vpaddd %xmm0, %xmm3, %xmm3 - vmovdqa %xmm3, 2*16(%rax) - vmovdqa %xmm7, 3*16(%rax) - - vmovdqa 4*16(%rax), %xmm0 - vmovdqa %xmm0, 4*16(%rsp) - vprotd $15, %xmm3, %xmm1 - vprotd $15, %xmm7, %xmm5 - vprotd $13, %xmm3, %xmm2 - vprotd $13, %xmm7, %xmm6 - vpxor %xmm1, %xmm2, %xmm2 - vpxor %xmm5, %xmm6, %xmm6 - vpsrld $10, %xmm3, %xmm3 - vpsrld $10, %xmm7, %xmm7 - vpxor %xmm2, %xmm3, %xmm3 - vpxor %xmm6, %xmm7, %xmm7 - vpaddd %xmm0, %xmm3, %xmm3 - vmovdqa %xmm3, 4*16(%rax) - vmovdqa %xmm7, 5*16(%rax) - - vmovdqa 6*16(%rax), %xmm0 - vmovdqa 7*16(%rax), %xmm4 - vmovdqa %xmm0, 6*16(%rsp) - vmovdqa %xmm4, 7*16(%rsp) - vprotd $15, %xmm3, %xmm1 - vprotd $15, %xmm7, %xmm5 - vprotd $13, %xmm3, %xmm2 - vprotd $13, %xmm7, %xmm6 - vpxor %xmm1, %xmm2, %xmm2 - vpxor %xmm5, %xmm6, %xmm6 - vpsrld $10, %xmm3, %xmm3 - vpsrld $10, %xmm7, %xmm7 - vpxor %xmm2, %xmm3, %xmm3 - vpxor %xmm6, %xmm7, %xmm7 - vpaddd %xmm0, %xmm3, %xmm3 - vpaddd %xmm4, %xmm7, %xmm7 - vmovdqa %xmm3, 6*16(%rax) - vmovdqa %xmm7, 7*16(%rax) - - vmovdqa 8*16(%rax), %xmm0 - vmovdqa 2*16(%rax), %xmm4 - vmovdqa %xmm0, 8*16(%rsp) - vprotd $15, %xmm3, %xmm1 - vprotd $15, %xmm7, %xmm5 - vprotd $13, %xmm3, %xmm2 - vprotd $13, %xmm7, %xmm6 - vpxor %xmm1, %xmm2, %xmm2 - vpxor %xmm5, %xmm6, %xmm6 - vpsrld $10, %xmm3, %xmm3 - vpsrld $10, %xmm7, %xmm7 - vpxor %xmm2, %xmm3, %xmm3 - vpxor %xmm6, %xmm7, %xmm7 - vpaddd %xmm0, %xmm3, %xmm3 - vpaddd %xmm4, %xmm7, %xmm7 - vmovdqa %xmm3, 8*16(%rax) - vmovdqa %xmm7, 9*16(%rax) - - vprotd $15, %xmm3, %xmm1 - vprotd $15, %xmm7, %xmm5 - vprotd $13, %xmm3, %xmm2 - vprotd $13, %xmm7, %xmm6 - vpxor %xmm1, %xmm2, %xmm2 - vpxor %xmm5, %xmm6, %xmm6 - vpsrld $10, %xmm3, %xmm3 - vpsrld $10, %xmm7, %xmm7 - vpxor %xmm2, %xmm3, %xmm3 - vpxor %xmm6, %xmm7, %xmm7 - vpaddd 3*16(%rax), %xmm3, %xmm3 - vpaddd 4*16(%rax), %xmm7, %xmm7 - vmovdqa %xmm3, 10*16(%rax) - vmovdqa %xmm7, 11*16(%rax) - - vprotd $15, %xmm3, %xmm1 - vprotd $15, %xmm7, %xmm5 - vprotd $13, %xmm3, %xmm2 - vprotd $13, %xmm7, %xmm6 - vpxor %xmm1, %xmm2, %xmm2 - vpxor %xmm5, %xmm6, %xmm6 - vpsrld $10, %xmm3, %xmm3 - vpsrld $10, %xmm7, %xmm7 - vpxor %xmm2, %xmm3, %xmm3 - vpxor %xmm6, %xmm7, %xmm7 - vpaddd 5*16(%rax), %xmm3, %xmm3 - vpaddd 6*16(%rax), %xmm7, %xmm7 - vmovdqa %xmm3, 12*16(%rax) - vmovdqa %xmm7, 13*16(%rax) - - vmovdqa 14*16(%rax), %xmm0 - vmovdqa 15*16(%rax), %xmm4 - vmovdqa %xmm0, 14*16(%rsp) - vmovdqa %xmm4, 15*16(%rsp) - vprotd $15, %xmm3, %xmm1 - vprotd $15, %xmm7, %xmm5 - vprotd $13, %xmm3, %xmm2 - vprotd $13, %xmm7, %xmm6 - vpxor %xmm1, %xmm2, %xmm2 - vpxor %xmm5, %xmm6, %xmm6 - vpaddd 7*16(%rax), %xmm0, %xmm0 - vpaddd 8*16(%rax), %xmm4, %xmm4 - vpsrld $10, %xmm3, %xmm3 - vpsrld $10, %xmm7, %xmm7 - vpxor %xmm2, %xmm3, %xmm3 - vpxor %xmm6, %xmm7, %xmm7 - vpaddd %xmm0, %xmm3, %xmm3 - vpaddd %xmm4, %xmm7, %xmm7 - vmovdqa %xmm3, 14*16(%rax) - vmovdqa %xmm7, 15*16(%rax) - -sha256d_ms_4way_xop_extend_loop2: - sha256_xop_extend_doubleround 16 - sha256_xop_extend_doubleround 18 - sha256_xop_extend_doubleround 20 - sha256_xop_extend_doubleround 22 - sha256_xop_extend_doubleround 24 - sha256_xop_extend_doubleround 26 - sha256_xop_extend_doubleround 28 - sha256_xop_extend_doubleround 30 - sha256_xop_extend_doubleround 32 - sha256_xop_extend_doubleround 34 - sha256_xop_extend_doubleround 36 - sha256_xop_extend_doubleround 38 - sha256_xop_extend_doubleround 40 - sha256_xop_extend_doubleround 42 - jz sha256d_ms_4way_xop_extend_coda2 - sha256_xop_extend_doubleround 44 - sha256_xop_extend_doubleround 46 - - movdqa 0(%rcx), %xmm7 - movdqa 16(%rcx), %xmm8 - movdqa 32(%rcx), %xmm9 - movdqa 48(%rcx), %xmm10 - movdqa 64(%rcx), %xmm0 - movdqa 80(%rcx), %xmm5 - movdqa 96(%rcx), %xmm4 - movdqa 112(%rcx), %xmm3 - - movq %rsi, %rax - leaq sha256_4k(%rip), %rcx - jmp sha256d_ms_4way_xop_main_loop1 - -sha256d_ms_4way_xop_main_loop2: - sha256_xop_main_round 0, %xmm10, %xmm9, %xmm8, %xmm0, %xmm3, %xmm4, %xmm5, %xmm7 - sha256_xop_main_round 1, %xmm9, %xmm8, %xmm0, %xmm10, %xmm4, %xmm5, %xmm7, %xmm3 - sha256_xop_main_round 2, %xmm8, %xmm0, %xmm10, %xmm9, %xmm5, %xmm7, %xmm3, %xmm4 -sha256d_ms_4way_xop_main_loop1: - sha256_xop_main_round 3, %xmm0, %xmm10, %xmm9, %xmm8, %xmm7, %xmm3, %xmm4, %xmm5 - sha256_xop_main_quadround 4 - sha256_xop_main_quadround 8 - sha256_xop_main_quadround 12 - sha256_xop_main_quadround 16 - sha256_xop_main_quadround 20 - sha256_xop_main_quadround 24 - sha256_xop_main_quadround 28 - sha256_xop_main_quadround 32 - sha256_xop_main_quadround 36 - sha256_xop_main_quadround 40 - sha256_xop_main_quadround 44 - sha256_xop_main_quadround 48 - sha256_xop_main_quadround 52 - sha256_xop_main_round 56, %xmm10, %xmm9, %xmm8, %xmm0, %xmm3, %xmm4, %xmm5, %xmm7 - jz sha256d_ms_4way_xop_finish - sha256_xop_main_round 57, %xmm9, %xmm8, %xmm0, %xmm10, %xmm4, %xmm5, %xmm7, %xmm3 - sha256_xop_main_round 58, %xmm8, %xmm0, %xmm10, %xmm9, %xmm5, %xmm7, %xmm3, %xmm4 - sha256_xop_main_round 59, %xmm0, %xmm10, %xmm9, %xmm8, %xmm7, %xmm3, %xmm4, %xmm5 - sha256_xop_main_quadround 60 - - movdqa 2*16(%rsp), %xmm1 - movdqa 3*16(%rsp), %xmm2 - movdqa 4*16(%rsp), %xmm6 - movdqa %xmm1, 18*16(%rsi) - movdqa %xmm2, 19*16(%rsi) - movdqa %xmm6, 20*16(%rsi) - movdqa 6*16(%rsp), %xmm1 - movdqa 7*16(%rsp), %xmm2 - movdqa 8*16(%rsp), %xmm6 - movdqa %xmm1, 22*16(%rsi) - movdqa %xmm2, 23*16(%rsi) - movdqa %xmm6, 24*16(%rsi) - movdqa 14*16(%rsp), %xmm1 - movdqa 15*16(%rsp), %xmm2 - movdqa %xmm1, 30*16(%rsi) - movdqa %xmm2, 31*16(%rsi) - - paddd 0(%rdx), %xmm7 - paddd 16(%rdx), %xmm5 - paddd 32(%rdx), %xmm4 - paddd 48(%rdx), %xmm3 - paddd 64(%rdx), %xmm0 - paddd 80(%rdx), %xmm8 - paddd 96(%rdx), %xmm9 - paddd 112(%rdx), %xmm10 - - movdqa %xmm7, 0(%rsp) - movdqa %xmm5, 16(%rsp) - movdqa %xmm4, 32(%rsp) - movdqa %xmm3, 48(%rsp) - movdqa %xmm0, 64(%rsp) - movdqa %xmm8, 80(%rsp) - movdqa %xmm9, 96(%rsp) - movdqa %xmm10, 112(%rsp) - - pxor %xmm0, %xmm0 - movq $0x8000000000000100, %rax - movd %rax, %xmm1 - pshufd $0x55, %xmm1, %xmm2 - pshufd $0x00, %xmm1, %xmm1 - movdqa %xmm2, 128(%rsp) - movdqa %xmm0, 144(%rsp) - movdqa %xmm0, 160(%rsp) - movdqa %xmm0, 176(%rsp) - movdqa %xmm0, 192(%rsp) - movdqa %xmm0, 208(%rsp) - movdqa %xmm0, 224(%rsp) - movdqa %xmm1, 240(%rsp) - - leaq 256(%rsp), %rax - cmpq %rax, %rax - - vmovdqa -15*16(%rax), %xmm0 - vmovdqa -14*16(%rax), %xmm4 - vprotd $25, %xmm0, %xmm1 - vprotd $25, %xmm4, %xmm5 - vprotd $14, %xmm0, %xmm2 - vprotd $14, %xmm4, %xmm6 - vpxor %xmm1, %xmm2, %xmm2 - vpxor %xmm5, %xmm6, %xmm6 - vpsrld $3, %xmm0, %xmm8 - vpsrld $3, %xmm4, %xmm4 - vpxor %xmm2, %xmm8, %xmm8 - vpxor %xmm6, %xmm4, %xmm4 - vpaddd %xmm0, %xmm4, %xmm4 - vpaddd -16*16(%rax), %xmm8, %xmm3 - vpaddd sha256d_4preext2_17(%rip), %xmm4, %xmm7 - vmovdqa %xmm3, 0*16(%rax) - vmovdqa %xmm7, 1*16(%rax) - - sha256_xop_extend_doubleround 2 - sha256_xop_extend_doubleround 4 - - vmovdqa -9*16(%rax), %xmm0 - vprotd $25, %xmm0, %xmm1 - vprotd $14, %xmm0, %xmm2 - vpsrld $3, %xmm0, %xmm8 - vpxor %xmm1, %xmm2, %xmm2 - vpxor %xmm2, %xmm8, %xmm8 - vpaddd sha256d_4preext2_23(%rip), %xmm0, %xmm4 - vpaddd -10*16(%rax), %xmm8, %xmm0 - vprotd $15, %xmm3, %xmm1 - vprotd $15, %xmm7, %xmm5 - vprotd $13, %xmm3, %xmm2 - vprotd $13, %xmm7, %xmm6 - vpxor %xmm1, %xmm2, %xmm2 - vpxor %xmm5, %xmm6, %xmm6 - vpaddd -1*16(%rax), %xmm0, %xmm0 - vpaddd 0*16(%rax), %xmm4, %xmm4 - vpsrld $10, %xmm3, %xmm3 - vpsrld $10, %xmm7, %xmm7 - vpxor %xmm2, %xmm3, %xmm3 - vpxor %xmm6, %xmm7, %xmm7 - vpaddd %xmm0, %xmm3, %xmm3 - vpaddd %xmm4, %xmm7, %xmm7 - vmovdqa %xmm3, 6*16(%rax) - vmovdqa %xmm7, 7*16(%rax) - - vprotd $15, %xmm3, %xmm1 - vprotd $15, %xmm7, %xmm5 - vprotd $13, %xmm3, %xmm2 - vprotd $13, %xmm7, %xmm6 - vpxor %xmm1, %xmm2, %xmm2 - vpxor %xmm5, %xmm6, %xmm6 - vpsrld $10, %xmm3, %xmm3 - vpsrld $10, %xmm7, %xmm7 - vpxor %xmm2, %xmm3, %xmm3 - vpxor %xmm6, %xmm7, %xmm7 - vpaddd sha256d_4preext2_24(%rip), %xmm3, %xmm3 - vpaddd 1*16(%rax), %xmm3, %xmm3 - vpaddd 2*16(%rax), %xmm7, %xmm7 - vmovdqa %xmm3, 8*16(%rax) - vmovdqa %xmm7, 9*16(%rax) - - vprotd $15, %xmm3, %xmm1 - vprotd $15, %xmm7, %xmm5 - vprotd $13, %xmm3, %xmm2 - vprotd $13, %xmm7, %xmm6 - vpxor %xmm1, %xmm2, %xmm2 - vpxor %xmm5, %xmm6, %xmm6 - vpsrld $10, %xmm3, %xmm3 - vpsrld $10, %xmm7, %xmm7 - vpxor %xmm2, %xmm3, %xmm3 - vpxor %xmm6, %xmm7, %xmm7 - vpaddd 3*16(%rax), %xmm3, %xmm3 - vpaddd 4*16(%rax), %xmm7, %xmm7 - vmovdqa %xmm3, 10*16(%rax) - vmovdqa %xmm7, 11*16(%rax) - - vprotd $15, %xmm3, %xmm1 - vprotd $15, %xmm7, %xmm5 - vprotd $13, %xmm3, %xmm2 - vprotd $13, %xmm7, %xmm6 - vpxor %xmm1, %xmm2, %xmm2 - vpxor %xmm5, %xmm6, %xmm6 - vpsrld $10, %xmm3, %xmm3 - vpsrld $10, %xmm7, %xmm7 - vpxor %xmm2, %xmm3, %xmm3 - vpxor %xmm6, %xmm7, %xmm7 - vpaddd 5*16(%rax), %xmm3, %xmm3 - vpaddd 6*16(%rax), %xmm7, %xmm7 - vmovdqa %xmm3, 12*16(%rax) - vmovdqa %xmm7, 13*16(%rax) - - vmovdqa sha256d_4preext2_30(%rip), %xmm0 - vmovdqa 0*16(%rax), %xmm4 - vprotd $25, %xmm4, %xmm5 - vprotd $14, %xmm4, %xmm6 - vpxor %xmm5, %xmm6, %xmm6 - vpsrld $3, %xmm4, %xmm4 - vpxor %xmm6, %xmm4, %xmm4 - vpaddd -1*16(%rax), %xmm4, %xmm4 - vprotd $15, %xmm3, %xmm1 - vprotd $15, %xmm7, %xmm5 - vprotd $13, %xmm3, %xmm2 - vprotd $13, %xmm7, %xmm6 - vpxor %xmm1, %xmm2, %xmm2 - vpxor %xmm5, %xmm6, %xmm6 - vpaddd 7*16(%rax), %xmm0, %xmm0 - vpaddd 8*16(%rax), %xmm4, %xmm4 - vpsrld $10, %xmm3, %xmm3 - vpsrld $10, %xmm7, %xmm7 - vpxor %xmm2, %xmm3, %xmm3 - vpxor %xmm6, %xmm7, %xmm7 - vpaddd %xmm0, %xmm3, %xmm3 - vpaddd %xmm4, %xmm7, %xmm7 - vmovdqa %xmm3, 14*16(%rax) - vmovdqa %xmm7, 15*16(%rax) - - jmp sha256d_ms_4way_xop_extend_loop2 - -sha256d_ms_4way_xop_extend_coda2: - sha256_xop_extend_round 44 - - movdqa sha256_4h+0(%rip), %xmm7 - movdqa sha256_4h+16(%rip), %xmm5 - movdqa sha256_4h+32(%rip), %xmm4 - movdqa sha256_4h+48(%rip), %xmm3 - movdqa sha256_4h+64(%rip), %xmm0 - movdqa sha256_4h+80(%rip), %xmm8 - movdqa sha256_4h+96(%rip), %xmm9 - movdqa sha256_4h+112(%rip), %xmm10 - - movq %rsp, %rax - leaq sha256_4k(%rip), %rcx - jmp sha256d_ms_4way_xop_main_loop2 - -.macro sha256_xop_main_round_red i, r0, r1, r2, r3, r4 - vpaddd 16*\i(%rax), \r0, %xmm6 - vpaddd 16*\i(%rcx), %xmm6, %xmm6 - vpandn \r1, \r3, %xmm1 - vpand \r3, \r2, %xmm2 - vpxor %xmm2, %xmm1, %xmm1 - vpaddd %xmm1, %xmm6, %xmm6 - vprotd $26, \r3, %xmm1 - vprotd $21, \r3, %xmm2 - vpxor %xmm1, %xmm2, %xmm2 - vprotd $7, \r3, \r0 - vpxor %xmm2, \r0, \r0 - vpaddd \r0, %xmm6, %xmm6 - vpaddd %xmm6, \r4, \r0 -.endm - -sha256d_ms_4way_xop_finish: - sha256_xop_main_round_red 57, %xmm9, %xmm8, %xmm0, %xmm10, %xmm4 - sha256_xop_main_round_red 58, %xmm8, %xmm0, %xmm10, %xmm9, %xmm5 - sha256_xop_main_round_red 59, %xmm0, %xmm10, %xmm9, %xmm8, %xmm7 - sha256_xop_main_round_red 60, %xmm10, %xmm9, %xmm8, %xmm0, %xmm3 - - paddd sha256_4h+112(%rip), %xmm10 - movdqa %xmm10, 112(%rdi) - - addq $1032, %rsp -#if defined(_WIN64) || defined(__CYGWIN__) - popq %rsi - movdqa 0(%rsp), %xmm6 - movdqa 16(%rsp), %xmm7 - movdqa 32(%rsp), %xmm8 - movdqa 48(%rsp), %xmm9 - movdqa 64(%rsp), %xmm10 - addq $80, %rsp - popq %rdi -#endif - ret - -#endif /* USE_XOP */ - - - .text - .p2align 6 - .globl sha256_use_4way - .globl _sha256_use_4way -sha256_use_4way: -_sha256_use_4way: - pushq %rbx - pushq %rcx - pushq %rdx - -#if defined(USE_AVX) - /* Check for AVX and OSXSAVE support */ - movl $1, %eax - cpuid - andl $0x18000000, %ecx - cmpl $0x18000000, %ecx - jne sha256_use_4way_base - /* Check for XMM and YMM state support */ - xorl %ecx, %ecx - xgetbv - andl $0x00000006, %eax - cmpl $0x00000006, %eax - jne sha256_use_4way_base -#if defined(USE_XOP) - /* Check for XOP support */ - movl $0x80000001, %eax - cpuid - andl $0x00000800, %ecx - jz sha256_use_4way_avx - -sha256_use_4way_xop: - leaq sha256d_ms_4way_xop(%rip), %rcx - leaq sha256_transform_4way_core_xop(%rip), %rdx - jmp sha256_use_4way_done -#endif /* USE_XOP */ - -sha256_use_4way_avx: - leaq sha256d_ms_4way_avx(%rip), %rcx - leaq sha256_transform_4way_core_avx(%rip), %rdx - jmp sha256_use_4way_done -#endif /* USE_AVX */ - -sha256_use_4way_base: - leaq sha256d_ms_4way_sse2(%rip), %rcx - leaq sha256_transform_4way_core_sse2(%rip), %rdx - -sha256_use_4way_done: - movq %rcx, sha256d_ms_4way_addr(%rip) - movq %rdx, sha256_transform_4way_core_addr(%rip) - popq %rdx - popq %rcx - popq %rbx - movl $1, %eax - ret - - -#if defined(USE_AVX2) - - .text - .p2align 6 - .globl sha256d_ms_8way - .globl _sha256d_ms_8way -sha256d_ms_8way: -_sha256d_ms_8way: -sha256d_ms_8way_avx2: -#if defined(_WIN64) || defined(__CYGWIN__) - pushq %rdi - subq $80, %rsp - vmovdqa %xmm6, 0(%rsp) - vmovdqa %xmm7, 16(%rsp) - vmovdqa %xmm8, 32(%rsp) - vmovdqa %xmm9, 48(%rsp) - vmovdqa %xmm10, 64(%rsp) - pushq %rsi - movq %rcx, %rdi - movq %rdx, %rsi - movq %r8, %rdx - movq %r9, %rcx -#endif - pushq %rbp - movq %rsp, %rbp - subq $64*32, %rsp - andq $-128, %rsp - - leaq 16*32(%rsi), %rax - -sha256d_ms_8way_avx2_extend_loop1: - vmovdqa 3*32(%rsi), %ymm0 - vmovdqa 2*32(%rax), %ymm3 - vmovdqa 3*32(%rax), %ymm7 - vmovdqa %ymm3, 2*32(%rsp) - vmovdqa %ymm7, 3*32(%rsp) - vpaddd %ymm0, %ymm7, %ymm7 - vpslld $14, %ymm0, %ymm2 - vpsrld $3, %ymm0, %ymm0 - vpsrld $4, %ymm0, %ymm1 - vpxor %ymm1, %ymm0, %ymm0 - vpxor %ymm2, %ymm0, %ymm0 - vpsrld $11, %ymm1, %ymm1 - vpslld $11, %ymm2, %ymm2 - vpxor %ymm1, %ymm0, %ymm0 - vpxor %ymm2, %ymm0, %ymm0 - vpaddd %ymm0, %ymm3, %ymm3 - vmovdqa %ymm3, 2*32(%rax) - vmovdqa %ymm7, 3*32(%rax) - - vmovdqa 4*32(%rax), %ymm0 - vmovdqa %ymm0, 4*32(%rsp) - vpslld $13, %ymm3, %ymm2 - vpslld $13, %ymm7, %ymm6 - vpsrld $10, %ymm3, %ymm3 - vpsrld $10, %ymm7, %ymm7 - vpsrld $7, %ymm3, %ymm1 - vpsrld $7, %ymm7, %ymm5 - vpxor %ymm1, %ymm3, %ymm3 - vpxor %ymm5, %ymm7, %ymm7 - vpsrld $2, %ymm1, %ymm1 - vpsrld $2, %ymm5, %ymm5 - vpxor %ymm2, %ymm3, %ymm3 - vpxor %ymm6, %ymm7, %ymm7 - vpslld $2, %ymm2, %ymm2 - vpslld $2, %ymm6, %ymm6 - vpxor %ymm1, %ymm3, %ymm3 - vpxor %ymm5, %ymm7, %ymm7 - vpxor %ymm2, %ymm3, %ymm3 - vpxor %ymm6, %ymm7, %ymm7 - vpaddd %ymm0, %ymm3, %ymm3 - vmovdqa %ymm3, 4*32(%rax) - vmovdqa %ymm7, 5*32(%rax) - - vmovdqa 6*32(%rax), %ymm0 - vmovdqa 7*32(%rax), %ymm4 - vmovdqa %ymm0, 6*32(%rsp) - vmovdqa %ymm4, 7*32(%rsp) - vpslld $13, %ymm3, %ymm2 - vpslld $13, %ymm7, %ymm6 - vpsrld $10, %ymm3, %ymm3 - vpsrld $10, %ymm7, %ymm7 - vpsrld $7, %ymm3, %ymm1 - vpsrld $7, %ymm7, %ymm5 - vpxor %ymm1, %ymm3, %ymm3 - vpxor %ymm5, %ymm7, %ymm7 - vpsrld $2, %ymm1, %ymm1 - vpsrld $2, %ymm5, %ymm5 - vpxor %ymm2, %ymm3, %ymm3 - vpxor %ymm6, %ymm7, %ymm7 - vpslld $2, %ymm2, %ymm2 - vpslld $2, %ymm6, %ymm6 - vpxor %ymm1, %ymm3, %ymm3 - vpxor %ymm5, %ymm7, %ymm7 - vpxor %ymm2, %ymm3, %ymm3 - vpxor %ymm6, %ymm7, %ymm7 - vpaddd %ymm0, %ymm3, %ymm3 - vpaddd %ymm4, %ymm7, %ymm7 - vmovdqa %ymm3, 6*32(%rax) - vmovdqa %ymm7, 7*32(%rax) - - vmovdqa 8*32(%rax), %ymm0 - vmovdqa 2*32(%rax), %ymm4 - vmovdqa %ymm0, 8*32(%rsp) - vpslld $13, %ymm3, %ymm2 - vpslld $13, %ymm7, %ymm6 - vpsrld $10, %ymm3, %ymm3 - vpsrld $10, %ymm7, %ymm7 - vpsrld $7, %ymm3, %ymm1 - vpsrld $7, %ymm7, %ymm5 - vpxor %ymm1, %ymm3, %ymm3 - vpxor %ymm5, %ymm7, %ymm7 - vpsrld $2, %ymm1, %ymm1 - vpsrld $2, %ymm5, %ymm5 - vpxor %ymm2, %ymm3, %ymm3 - vpxor %ymm6, %ymm7, %ymm7 - vpslld $2, %ymm2, %ymm2 - vpslld $2, %ymm6, %ymm6 - vpxor %ymm1, %ymm3, %ymm3 - vpxor %ymm5, %ymm7, %ymm7 - vpxor %ymm2, %ymm3, %ymm3 - vpxor %ymm6, %ymm7, %ymm7 - vpaddd %ymm0, %ymm3, %ymm3 - vpaddd %ymm4, %ymm7, %ymm7 - vmovdqa %ymm3, 8*32(%rax) - vmovdqa %ymm7, 9*32(%rax) - - vpslld $13, %ymm3, %ymm2 - vpslld $13, %ymm7, %ymm6 - vpsrld $10, %ymm3, %ymm3 - vpsrld $10, %ymm7, %ymm7 - vpsrld $7, %ymm3, %ymm1 - vpsrld $7, %ymm7, %ymm5 - vpxor %ymm1, %ymm3, %ymm3 - vpxor %ymm5, %ymm7, %ymm7 - vpsrld $2, %ymm1, %ymm1 - vpsrld $2, %ymm5, %ymm5 - vpxor %ymm2, %ymm3, %ymm3 - vpxor %ymm6, %ymm7, %ymm7 - vpslld $2, %ymm2, %ymm2 - vpslld $2, %ymm6, %ymm6 - vpxor %ymm1, %ymm3, %ymm3 - vpxor %ymm5, %ymm7, %ymm7 - vpxor %ymm2, %ymm3, %ymm3 - vpxor %ymm6, %ymm7, %ymm7 - vpaddd 3*32(%rax), %ymm3, %ymm3 - vpaddd 4*32(%rax), %ymm7, %ymm7 - vmovdqa %ymm3, 10*32(%rax) - vmovdqa %ymm7, 11*32(%rax) - - vpslld $13, %ymm3, %ymm2 - vpslld $13, %ymm7, %ymm6 - vpsrld $10, %ymm3, %ymm3 - vpsrld $10, %ymm7, %ymm7 - vpsrld $7, %ymm3, %ymm1 - vpsrld $7, %ymm7, %ymm5 - vpxor %ymm1, %ymm3, %ymm3 - vpxor %ymm5, %ymm7, %ymm7 - vpsrld $2, %ymm1, %ymm1 - vpsrld $2, %ymm5, %ymm5 - vpxor %ymm2, %ymm3, %ymm3 - vpxor %ymm6, %ymm7, %ymm7 - vpslld $2, %ymm2, %ymm2 - vpslld $2, %ymm6, %ymm6 - vpxor %ymm1, %ymm3, %ymm3 - vpxor %ymm5, %ymm7, %ymm7 - vpxor %ymm2, %ymm3, %ymm3 - vpxor %ymm6, %ymm7, %ymm7 - vpaddd 5*32(%rax), %ymm3, %ymm3 - vpaddd 6*32(%rax), %ymm7, %ymm7 - vmovdqa %ymm3, 12*32(%rax) - vmovdqa %ymm7, 13*32(%rax) - - vmovdqa 14*32(%rax), %ymm0 - vmovdqa 15*32(%rax), %ymm4 - vmovdqa %ymm0, 14*32(%rsp) - vmovdqa %ymm4, 15*32(%rsp) - vpslld $13, %ymm3, %ymm2 - vpslld $13, %ymm7, %ymm6 - vpsrld $10, %ymm3, %ymm3 - vpsrld $10, %ymm7, %ymm7 - vpaddd 7*32(%rax), %ymm0, %ymm0 - vpaddd 8*32(%rax), %ymm4, %ymm4 - vpsrld $7, %ymm3, %ymm1 - vpsrld $7, %ymm7, %ymm5 - vpxor %ymm1, %ymm3, %ymm3 - vpxor %ymm5, %ymm7, %ymm7 - vpsrld $2, %ymm1, %ymm1 - vpsrld $2, %ymm5, %ymm5 - vpxor %ymm2, %ymm3, %ymm3 - vpxor %ymm6, %ymm7, %ymm7 - vpslld $2, %ymm2, %ymm2 - vpslld $2, %ymm6, %ymm6 - vpxor %ymm1, %ymm3, %ymm3 - vpxor %ymm5, %ymm7, %ymm7 - vpxor %ymm2, %ymm3, %ymm3 - vpxor %ymm6, %ymm7, %ymm7 - vpaddd %ymm0, %ymm3, %ymm3 - vpaddd %ymm4, %ymm7, %ymm7 - vmovdqa %ymm3, 14*32(%rax) - vmovdqa %ymm7, 15*32(%rax) - -sha256d_ms_8way_avx2_extend_loop2: - sha256_avx2_extend_doubleround 16 - sha256_avx2_extend_doubleround 18 - sha256_avx2_extend_doubleround 20 - sha256_avx2_extend_doubleround 22 - sha256_avx2_extend_doubleround 24 - sha256_avx2_extend_doubleround 26 - sha256_avx2_extend_doubleround 28 - sha256_avx2_extend_doubleround 30 - sha256_avx2_extend_doubleround 32 - sha256_avx2_extend_doubleround 34 - sha256_avx2_extend_doubleround 36 - sha256_avx2_extend_doubleround 38 - sha256_avx2_extend_doubleround 40 - sha256_avx2_extend_doubleround 42 - jz sha256d_ms_8way_avx2_extend_coda2 - sha256_avx2_extend_doubleround 44 - sha256_avx2_extend_doubleround 46 - - vmovdqa 0(%rcx), %ymm7 - vmovdqa 32(%rcx), %ymm8 - vmovdqa 64(%rcx), %ymm9 - vmovdqa 96(%rcx), %ymm10 - vmovdqa 128(%rcx), %ymm0 - vmovdqa 160(%rcx), %ymm5 - vmovdqa 192(%rcx), %ymm4 - vmovdqa 224(%rcx), %ymm3 - - movq %rsi, %rax - leaq sha256_8k(%rip), %rcx - jmp sha256d_ms_8way_avx2_main_loop1 - -sha256d_ms_8way_avx2_main_loop2: - sha256_avx2_main_round 0, %ymm10, %ymm9, %ymm8, %ymm0, %ymm3, %ymm4, %ymm5, %ymm7 - sha256_avx2_main_round 1, %ymm9, %ymm8, %ymm0, %ymm10, %ymm4, %ymm5, %ymm7, %ymm3 - sha256_avx2_main_round 2, %ymm8, %ymm0, %ymm10, %ymm9, %ymm5, %ymm7, %ymm3, %ymm4 -sha256d_ms_8way_avx2_main_loop1: - sha256_avx2_main_round 3, %ymm0, %ymm10, %ymm9, %ymm8, %ymm7, %ymm3, %ymm4, %ymm5 - sha256_avx2_main_quadround 4 - sha256_avx2_main_quadround 8 - sha256_avx2_main_quadround 12 - sha256_avx2_main_quadround 16 - sha256_avx2_main_quadround 20 - sha256_avx2_main_quadround 24 - sha256_avx2_main_quadround 28 - sha256_avx2_main_quadround 32 - sha256_avx2_main_quadround 36 - sha256_avx2_main_quadround 40 - sha256_avx2_main_quadround 44 - sha256_avx2_main_quadround 48 - sha256_avx2_main_quadround 52 - sha256_avx2_main_round 56, %ymm10, %ymm9, %ymm8, %ymm0, %ymm3, %ymm4, %ymm5, %ymm7 - jz sha256d_ms_8way_avx2_finish - sha256_avx2_main_round 57, %ymm9, %ymm8, %ymm0, %ymm10, %ymm4, %ymm5, %ymm7, %ymm3 - sha256_avx2_main_round 58, %ymm8, %ymm0, %ymm10, %ymm9, %ymm5, %ymm7, %ymm3, %ymm4 - sha256_avx2_main_round 59, %ymm0, %ymm10, %ymm9, %ymm8, %ymm7, %ymm3, %ymm4, %ymm5 - sha256_avx2_main_quadround 60 - - vmovdqa 2*32(%rsp), %ymm1 - vmovdqa 3*32(%rsp), %ymm2 - vmovdqa 4*32(%rsp), %ymm6 - vmovdqa %ymm1, 18*32(%rsi) - vmovdqa %ymm2, 19*32(%rsi) - vmovdqa %ymm6, 20*32(%rsi) - vmovdqa 6*32(%rsp), %ymm1 - vmovdqa 7*32(%rsp), %ymm2 - vmovdqa 8*32(%rsp), %ymm6 - vmovdqa %ymm1, 22*32(%rsi) - vmovdqa %ymm2, 23*32(%rsi) - vmovdqa %ymm6, 24*32(%rsi) - vmovdqa 14*32(%rsp), %ymm1 - vmovdqa 15*32(%rsp), %ymm2 - vmovdqa %ymm1, 30*32(%rsi) - vmovdqa %ymm2, 31*32(%rsi) - - vpaddd 0(%rdx), %ymm7, %ymm7 - vpaddd 32(%rdx), %ymm5, %ymm5 - vpaddd 64(%rdx), %ymm4, %ymm4 - vpaddd 96(%rdx), %ymm3, %ymm3 - vpaddd 128(%rdx), %ymm0, %ymm0 - vpaddd 160(%rdx), %ymm8, %ymm8 - vpaddd 192(%rdx), %ymm9, %ymm9 - vpaddd 224(%rdx), %ymm10, %ymm10 - - vmovdqa %ymm7, 0(%rsp) - vmovdqa %ymm5, 32(%rsp) - vmovdqa %ymm4, 64(%rsp) - vmovdqa %ymm3, 96(%rsp) - vmovdqa %ymm0, 128(%rsp) - vmovdqa %ymm8, 160(%rsp) - vmovdqa %ymm9, 192(%rsp) - vmovdqa %ymm10, 224(%rsp) - - vpxor %ymm0, %ymm0, %ymm0 - movq $0x8000000000000100, %rax - vmovd %rax, %xmm1 - vinserti128 $1, %xmm1, %ymm1, %ymm1 - vpshufd $0x55, %ymm1, %ymm2 - vpshufd $0x00, %ymm1, %ymm1 - vmovdqa %ymm2, 8*32(%rsp) - vmovdqa %ymm0, 9*32(%rsp) - vmovdqa %ymm0, 10*32(%rsp) - vmovdqa %ymm0, 11*32(%rsp) - vmovdqa %ymm0, 12*32(%rsp) - vmovdqa %ymm0, 13*32(%rsp) - vmovdqa %ymm0, 14*32(%rsp) - vmovdqa %ymm1, 15*32(%rsp) - - leaq 16*32(%rsp), %rax - cmpq %rax, %rax - - vmovdqa -15*32(%rax), %ymm0 - vmovdqa -14*32(%rax), %ymm4 - vpslld $14, %ymm0, %ymm2 - vpslld $14, %ymm4, %ymm6 - vpsrld $3, %ymm0, %ymm8 - vpsrld $3, %ymm4, %ymm4 - vpsrld $7, %ymm0, %ymm1 - vpsrld $4, %ymm4, %ymm5 - vpxor %ymm1, %ymm8, %ymm8 - vpxor %ymm5, %ymm4, %ymm4 - vpsrld $11, %ymm1, %ymm1 - vpsrld $11, %ymm5, %ymm5 - vpxor %ymm2, %ymm8, %ymm8 - vpxor %ymm6, %ymm4, %ymm4 - vpslld $11, %ymm2, %ymm2 - vpslld $11, %ymm6, %ymm6 - vpxor %ymm1, %ymm8, %ymm8 - vpxor %ymm5, %ymm4, %ymm4 - vpxor %ymm2, %ymm8, %ymm8 - vpxor %ymm6, %ymm4, %ymm4 - vpaddd %ymm0, %ymm4, %ymm4 - vpaddd -16*32(%rax), %ymm8, %ymm3 - vpaddd sha256d_8preext2_17(%rip), %ymm4, %ymm7 - vmovdqa %ymm3, 0*32(%rax) - vmovdqa %ymm7, 1*32(%rax) - - sha256_avx2_extend_doubleround 2 - sha256_avx2_extend_doubleround 4 - - vmovdqa -9*32(%rax), %ymm0 - vpslld $14, %ymm0, %ymm2 - vpsrld $3, %ymm0, %ymm8 - vpsrld $7, %ymm0, %ymm1 - vpxor %ymm1, %ymm8, %ymm8 - vpxor %ymm2, %ymm8, %ymm8 - vpsrld $11, %ymm1, %ymm1 - vpslld $11, %ymm2, %ymm2 - vpxor %ymm1, %ymm8, %ymm8 - vpxor %ymm2, %ymm8, %ymm8 - vpaddd sha256d_8preext2_23(%rip), %ymm0, %ymm4 - vpaddd -10*32(%rax), %ymm8, %ymm0 - vpslld $13, %ymm3, %ymm2 - vpslld $13, %ymm7, %ymm6 - vpsrld $10, %ymm3, %ymm3 - vpsrld $10, %ymm7, %ymm7 - vpaddd -1*32(%rax), %ymm0, %ymm0 - vpaddd 0*32(%rax), %ymm4, %ymm4 - vpsrld $7, %ymm3, %ymm1 - vpsrld $7, %ymm7, %ymm5 - vpxor %ymm1, %ymm3, %ymm3 - vpxor %ymm5, %ymm7, %ymm7 - vpsrld $2, %ymm1, %ymm1 - vpsrld $2, %ymm5, %ymm5 - vpxor %ymm2, %ymm3, %ymm3 - vpxor %ymm6, %ymm7, %ymm7 - vpslld $2, %ymm2, %ymm2 - vpslld $2, %ymm6, %ymm6 - vpxor %ymm1, %ymm3, %ymm3 - vpxor %ymm5, %ymm7, %ymm7 - vpxor %ymm2, %ymm3, %ymm3 - vpxor %ymm6, %ymm7, %ymm7 - vpaddd %ymm0, %ymm3, %ymm3 - vpaddd %ymm4, %ymm7, %ymm7 - vmovdqa %ymm3, 6*32(%rax) - vmovdqa %ymm7, 7*32(%rax) - - vpslld $13, %ymm3, %ymm2 - vpslld $13, %ymm7, %ymm6 - vpsrld $10, %ymm3, %ymm3 - vpsrld $10, %ymm7, %ymm7 - vpsrld $7, %ymm3, %ymm1 - vpsrld $7, %ymm7, %ymm5 - vpxor %ymm1, %ymm3, %ymm3 - vpxor %ymm5, %ymm7, %ymm7 - vpsrld $2, %ymm1, %ymm1 - vpsrld $2, %ymm5, %ymm5 - vpxor %ymm2, %ymm3, %ymm3 - vpxor %ymm6, %ymm7, %ymm7 - vpslld $2, %ymm2, %ymm2 - vpslld $2, %ymm6, %ymm6 - vpxor %ymm1, %ymm3, %ymm3 - vpxor %ymm5, %ymm7, %ymm7 - vpxor %ymm2, %ymm3, %ymm3 - vpxor %ymm6, %ymm7, %ymm7 - vpaddd sha256d_8preext2_24(%rip), %ymm3, %ymm3 - vpaddd 1*32(%rax), %ymm3, %ymm3 - vpaddd 2*32(%rax), %ymm7, %ymm7 - vmovdqa %ymm3, 8*32(%rax) - vmovdqa %ymm7, 9*32(%rax) - - vpslld $13, %ymm3, %ymm2 - vpslld $13, %ymm7, %ymm6 - vpsrld $10, %ymm3, %ymm3 - vpsrld $10, %ymm7, %ymm7 - vpsrld $7, %ymm3, %ymm1 - vpsrld $7, %ymm7, %ymm5 - vpxor %ymm1, %ymm3, %ymm3 - vpxor %ymm5, %ymm7, %ymm7 - vpsrld $2, %ymm1, %ymm1 - vpsrld $2, %ymm5, %ymm5 - vpxor %ymm2, %ymm3, %ymm3 - vpxor %ymm6, %ymm7, %ymm7 - vpslld $2, %ymm2, %ymm2 - vpslld $2, %ymm6, %ymm6 - vpxor %ymm1, %ymm3, %ymm3 - vpxor %ymm5, %ymm7, %ymm7 - vpxor %ymm2, %ymm3, %ymm3 - vpxor %ymm6, %ymm7, %ymm7 - vpaddd 3*32(%rax), %ymm3, %ymm3 - vpaddd 4*32(%rax), %ymm7, %ymm7 - vmovdqa %ymm3, 10*32(%rax) - vmovdqa %ymm7, 11*32(%rax) - - vpslld $13, %ymm3, %ymm2 - vpslld $13, %ymm7, %ymm6 - vpsrld $10, %ymm3, %ymm3 - vpsrld $10, %ymm7, %ymm7 - vpsrld $7, %ymm3, %ymm1 - vpsrld $7, %ymm7, %ymm5 - vpxor %ymm1, %ymm3, %ymm3 - vpxor %ymm5, %ymm7, %ymm7 - vpsrld $2, %ymm1, %ymm1 - vpsrld $2, %ymm5, %ymm5 - vpxor %ymm2, %ymm3, %ymm3 - vpxor %ymm6, %ymm7, %ymm7 - vpslld $2, %ymm2, %ymm2 - vpslld $2, %ymm6, %ymm6 - vpxor %ymm1, %ymm3, %ymm3 - vpxor %ymm5, %ymm7, %ymm7 - vpxor %ymm2, %ymm3, %ymm3 - vpxor %ymm6, %ymm7, %ymm7 - vpaddd 5*32(%rax), %ymm3, %ymm3 - vpaddd 6*32(%rax), %ymm7, %ymm7 - vmovdqa %ymm3, 12*32(%rax) - vmovdqa %ymm7, 13*32(%rax) - - vmovdqa sha256d_8preext2_30(%rip), %ymm0 - vmovdqa 0*32(%rax), %ymm4 - vpslld $14, %ymm4, %ymm6 - vpsrld $3, %ymm4, %ymm4 - vpsrld $4, %ymm4, %ymm5 - vpxor %ymm5, %ymm4, %ymm4 - vpxor %ymm6, %ymm4, %ymm4 - vpsrld $11, %ymm5, %ymm5 - vpslld $11, %ymm6, %ymm6 - vpxor %ymm5, %ymm4, %ymm4 - vpxor %ymm6, %ymm4, %ymm4 - vpaddd -1*32(%rax), %ymm4, %ymm4 - vpslld $13, %ymm3, %ymm2 - vpslld $13, %ymm7, %ymm6 - vpsrld $10, %ymm3, %ymm3 - vpsrld $10, %ymm7, %ymm7 - vpaddd 7*32(%rax), %ymm0, %ymm0 - vpaddd 8*32(%rax), %ymm4, %ymm4 - vpsrld $7, %ymm3, %ymm1 - vpsrld $7, %ymm7, %ymm5 - vpxor %ymm1, %ymm3, %ymm3 - vpxor %ymm5, %ymm7, %ymm7 - vpsrld $2, %ymm1, %ymm1 - vpsrld $2, %ymm5, %ymm5 - vpxor %ymm2, %ymm3, %ymm3 - vpxor %ymm6, %ymm7, %ymm7 - vpslld $2, %ymm2, %ymm2 - vpslld $2, %ymm6, %ymm6 - vpxor %ymm1, %ymm3, %ymm3 - vpxor %ymm5, %ymm7, %ymm7 - vpxor %ymm2, %ymm3, %ymm3 - vpxor %ymm6, %ymm7, %ymm7 - vpaddd %ymm0, %ymm3, %ymm3 - vpaddd %ymm4, %ymm7, %ymm7 - vmovdqa %ymm3, 14*32(%rax) - vmovdqa %ymm7, 15*32(%rax) - - jmp sha256d_ms_8way_avx2_extend_loop2 - -sha256d_ms_8way_avx2_extend_coda2: - sha256_avx2_extend_round 44 - - vmovdqa sha256_8h+0(%rip), %ymm7 - vmovdqa sha256_8h+32(%rip), %ymm5 - vmovdqa sha256_8h+64(%rip), %ymm4 - vmovdqa sha256_8h+96(%rip), %ymm3 - vmovdqa sha256_8h+128(%rip), %ymm0 - vmovdqa sha256_8h+160(%rip), %ymm8 - vmovdqa sha256_8h+192(%rip), %ymm9 - vmovdqa sha256_8h+224(%rip), %ymm10 - - movq %rsp, %rax - leaq sha256_8k(%rip), %rcx - jmp sha256d_ms_8way_avx2_main_loop2 - -.macro sha256_avx2_main_round_red i, r0, r1, r2, r3, r4 - vpaddd 32*\i(%rax), \r0, %ymm6 - vpaddd 32*\i(%rcx), %ymm6, %ymm6 - vpandn \r1, \r3, %ymm1 - vpand \r3, \r2, %ymm2 - vpxor %ymm2, %ymm1, %ymm1 - vpaddd %ymm1, %ymm6, %ymm6 - vpslld $7, \r3, %ymm1 - vpsrld $6, \r3, \r0 - vpsrld $5, \r0, %ymm2 - vpxor %ymm1, \r0, \r0 - vpxor %ymm2, \r0, \r0 - vpslld $14, %ymm1, %ymm1 - vpsrld $14, %ymm2, %ymm2 - vpxor %ymm1, \r0, \r0 - vpxor %ymm2, \r0, \r0 - vpslld $5, %ymm1, %ymm1 - vpxor %ymm1, \r0, \r0 - vpaddd \r0, %ymm6, %ymm6 - vpaddd %ymm6, \r4, \r0 -.endm - -sha256d_ms_8way_avx2_finish: - sha256_avx2_main_round_red 57, %ymm9, %ymm8, %ymm0, %ymm10, %ymm4 - sha256_avx2_main_round_red 58, %ymm8, %ymm0, %ymm10, %ymm9, %ymm5 - sha256_avx2_main_round_red 59, %ymm0, %ymm10, %ymm9, %ymm8, %ymm7 - sha256_avx2_main_round_red 60, %ymm10, %ymm9, %ymm8, %ymm0, %ymm3 - - vpaddd sha256_8h+224(%rip), %ymm10, %ymm10 - vmovdqa %ymm10, 224(%rdi) - - movq %rbp, %rsp - popq %rbp -#if defined(_WIN64) || defined(__CYGWIN__) - popq %rsi - vmovdqa 0(%rsp), %xmm6 - vmovdqa 16(%rsp), %xmm7 - vmovdqa 32(%rsp), %xmm8 - vmovdqa 48(%rsp), %xmm9 - vmovdqa 64(%rsp), %xmm10 - addq $80, %rsp - popq %rdi -#endif - ret - - - .text - .p2align 6 - .globl sha256_use_8way - .globl _sha256_use_8way -sha256_use_8way: -_sha256_use_8way: - pushq %rbx - - /* Check for AVX and OSXSAVE support */ - movl $1, %eax - cpuid - andl $0x18000000, %ecx - cmpl $0x18000000, %ecx - jne sha256_use_8way_no - /* Check for AVX2 support */ - movl $7, %eax - xorl %ecx, %ecx - cpuid - andl $0x00000020, %ebx - cmpl $0x00000020, %ebx - jne sha256_use_8way_no - /* Check for XMM and YMM state support */ - xorl %ecx, %ecx - xgetbv - andl $0x00000006, %eax - cmpl $0x00000006, %eax - jne sha256_use_8way_no - -sha256_use_8way_yes: - movl $1, %eax - jmp sha256_use_8way_done - -sha256_use_8way_no: - xorl %eax, %eax - -sha256_use_8way_done: - popq %rbx - ret - -#endif /* USE_AVX2 */ - -#endif diff --git a/asm/sha2-x86.S b/asm/sha2-x86.S deleted file mode 100644 index 5ff4f4b2..00000000 --- a/asm/sha2-x86.S +++ /dev/null @@ -1,1193 +0,0 @@ -/* - * Copyright 2012 pooler@litecoinpool.org - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. See COPYING for more details. - */ - -#include - -#if defined(__linux__) && defined(__ELF__) - .section .note.GNU-stack,"",%progbits -#endif - -#if defined(USE_ASM) && defined(__i386__) - - .data - .p2align 7 -sha256_4h: - .long 0x6a09e667, 0x6a09e667, 0x6a09e667, 0x6a09e667 - .long 0xbb67ae85, 0xbb67ae85, 0xbb67ae85, 0xbb67ae85 - .long 0x3c6ef372, 0x3c6ef372, 0x3c6ef372, 0x3c6ef372 - .long 0xa54ff53a, 0xa54ff53a, 0xa54ff53a, 0xa54ff53a - .long 0x510e527f, 0x510e527f, 0x510e527f, 0x510e527f - .long 0x9b05688c, 0x9b05688c, 0x9b05688c, 0x9b05688c - .long 0x1f83d9ab, 0x1f83d9ab, 0x1f83d9ab, 0x1f83d9ab - .long 0x5be0cd19, 0x5be0cd19, 0x5be0cd19, 0x5be0cd19 - - .data - .p2align 7 -sha256_4k: - .long 0x428a2f98, 0x428a2f98, 0x428a2f98, 0x428a2f98 - .long 0x71374491, 0x71374491, 0x71374491, 0x71374491 - .long 0xb5c0fbcf, 0xb5c0fbcf, 0xb5c0fbcf, 0xb5c0fbcf - .long 0xe9b5dba5, 0xe9b5dba5, 0xe9b5dba5, 0xe9b5dba5 - .long 0x3956c25b, 0x3956c25b, 0x3956c25b, 0x3956c25b - .long 0x59f111f1, 0x59f111f1, 0x59f111f1, 0x59f111f1 - .long 0x923f82a4, 0x923f82a4, 0x923f82a4, 0x923f82a4 - .long 0xab1c5ed5, 0xab1c5ed5, 0xab1c5ed5, 0xab1c5ed5 - .long 0xd807aa98, 0xd807aa98, 0xd807aa98, 0xd807aa98 - .long 0x12835b01, 0x12835b01, 0x12835b01, 0x12835b01 - .long 0x243185be, 0x243185be, 0x243185be, 0x243185be - .long 0x550c7dc3, 0x550c7dc3, 0x550c7dc3, 0x550c7dc3 - .long 0x72be5d74, 0x72be5d74, 0x72be5d74, 0x72be5d74 - .long 0x80deb1fe, 0x80deb1fe, 0x80deb1fe, 0x80deb1fe - .long 0x9bdc06a7, 0x9bdc06a7, 0x9bdc06a7, 0x9bdc06a7 - .long 0xc19bf174, 0xc19bf174, 0xc19bf174, 0xc19bf174 - .long 0xe49b69c1, 0xe49b69c1, 0xe49b69c1, 0xe49b69c1 - .long 0xefbe4786, 0xefbe4786, 0xefbe4786, 0xefbe4786 - .long 0x0fc19dc6, 0x0fc19dc6, 0x0fc19dc6, 0x0fc19dc6 - .long 0x240ca1cc, 0x240ca1cc, 0x240ca1cc, 0x240ca1cc - .long 0x2de92c6f, 0x2de92c6f, 0x2de92c6f, 0x2de92c6f - .long 0x4a7484aa, 0x4a7484aa, 0x4a7484aa, 0x4a7484aa - .long 0x5cb0a9dc, 0x5cb0a9dc, 0x5cb0a9dc, 0x5cb0a9dc - .long 0x76f988da, 0x76f988da, 0x76f988da, 0x76f988da - .long 0x983e5152, 0x983e5152, 0x983e5152, 0x983e5152 - .long 0xa831c66d, 0xa831c66d, 0xa831c66d, 0xa831c66d - .long 0xb00327c8, 0xb00327c8, 0xb00327c8, 0xb00327c8 - .long 0xbf597fc7, 0xbf597fc7, 0xbf597fc7, 0xbf597fc7 - .long 0xc6e00bf3, 0xc6e00bf3, 0xc6e00bf3, 0xc6e00bf3 - .long 0xd5a79147, 0xd5a79147, 0xd5a79147, 0xd5a79147 - .long 0x06ca6351, 0x06ca6351, 0x06ca6351, 0x06ca6351 - .long 0x14292967, 0x14292967, 0x14292967, 0x14292967 - .long 0x27b70a85, 0x27b70a85, 0x27b70a85, 0x27b70a85 - .long 0x2e1b2138, 0x2e1b2138, 0x2e1b2138, 0x2e1b2138 - .long 0x4d2c6dfc, 0x4d2c6dfc, 0x4d2c6dfc, 0x4d2c6dfc - .long 0x53380d13, 0x53380d13, 0x53380d13, 0x53380d13 - .long 0x650a7354, 0x650a7354, 0x650a7354, 0x650a7354 - .long 0x766a0abb, 0x766a0abb, 0x766a0abb, 0x766a0abb - .long 0x81c2c92e, 0x81c2c92e, 0x81c2c92e, 0x81c2c92e - .long 0x92722c85, 0x92722c85, 0x92722c85, 0x92722c85 - .long 0xa2bfe8a1, 0xa2bfe8a1, 0xa2bfe8a1, 0xa2bfe8a1 - .long 0xa81a664b, 0xa81a664b, 0xa81a664b, 0xa81a664b - .long 0xc24b8b70, 0xc24b8b70, 0xc24b8b70, 0xc24b8b70 - .long 0xc76c51a3, 0xc76c51a3, 0xc76c51a3, 0xc76c51a3 - .long 0xd192e819, 0xd192e819, 0xd192e819, 0xd192e819 - .long 0xd6990624, 0xd6990624, 0xd6990624, 0xd6990624 - .long 0xf40e3585, 0xf40e3585, 0xf40e3585, 0xf40e3585 - .long 0x106aa070, 0x106aa070, 0x106aa070, 0x106aa070 - .long 0x19a4c116, 0x19a4c116, 0x19a4c116, 0x19a4c116 - .long 0x1e376c08, 0x1e376c08, 0x1e376c08, 0x1e376c08 - .long 0x2748774c, 0x2748774c, 0x2748774c, 0x2748774c - .long 0x34b0bcb5, 0x34b0bcb5, 0x34b0bcb5, 0x34b0bcb5 - .long 0x391c0cb3, 0x391c0cb3, 0x391c0cb3, 0x391c0cb3 - .long 0x4ed8aa4a, 0x4ed8aa4a, 0x4ed8aa4a, 0x4ed8aa4a - .long 0x5b9cca4f, 0x5b9cca4f, 0x5b9cca4f, 0x5b9cca4f - .long 0x682e6ff3, 0x682e6ff3, 0x682e6ff3, 0x682e6ff3 - .long 0x748f82ee, 0x748f82ee, 0x748f82ee, 0x748f82ee - .long 0x78a5636f, 0x78a5636f, 0x78a5636f, 0x78a5636f - .long 0x84c87814, 0x84c87814, 0x84c87814, 0x84c87814 - .long 0x8cc70208, 0x8cc70208, 0x8cc70208, 0x8cc70208 - .long 0x90befffa, 0x90befffa, 0x90befffa, 0x90befffa - .long 0xa4506ceb, 0xa4506ceb, 0xa4506ceb, 0xa4506ceb - .long 0xbef9a3f7, 0xbef9a3f7, 0xbef9a3f7, 0xbef9a3f7 - .long 0xc67178f2, 0xc67178f2, 0xc67178f2, 0xc67178f2 - - .data - .p2align 6 -sha256d_4preext2_15: - .long 0x00000100, 0x00000100, 0x00000100, 0x00000100 -sha256d_4preext2_17: - .long 0x00a00000, 0x00a00000, 0x00a00000, 0x00a00000 -sha256d_4preext2_23: - .long 0x11002000, 0x11002000, 0x11002000, 0x11002000 -sha256d_4preext2_24: - .long 0x80000000, 0x80000000, 0x80000000, 0x80000000 -sha256d_4preext2_30: - .long 0x00400022, 0x00400022, 0x00400022, 0x00400022 - - - .text - .p2align 5 - .globl sha256_init_4way - .globl _sha256_init_4way -sha256_init_4way: -_sha256_init_4way: - movl 4(%esp), %edx - movdqa sha256_4h+0, %xmm0 - movdqa sha256_4h+16, %xmm1 - movdqa sha256_4h+32, %xmm2 - movdqa sha256_4h+48, %xmm3 - movdqu %xmm0, 0(%edx) - movdqu %xmm1, 16(%edx) - movdqu %xmm2, 32(%edx) - movdqu %xmm3, 48(%edx) - movdqa sha256_4h+64, %xmm0 - movdqa sha256_4h+80, %xmm1 - movdqa sha256_4h+96, %xmm2 - movdqa sha256_4h+112, %xmm3 - movdqu %xmm0, 64(%edx) - movdqu %xmm1, 80(%edx) - movdqu %xmm2, 96(%edx) - movdqu %xmm3, 112(%edx) - ret - - -.macro sha256_sse2_extend_round i - movdqa (\i-15)*16(%eax), %xmm0 - movdqa %xmm0, %xmm2 - psrld $3, %xmm0 - movdqa %xmm0, %xmm1 - pslld $14, %xmm2 - psrld $4, %xmm1 - pxor %xmm1, %xmm0 - pxor %xmm2, %xmm0 - psrld $11, %xmm1 - pslld $11, %xmm2 - pxor %xmm1, %xmm0 - pxor %xmm2, %xmm0 - paddd (\i-16)*16(%eax), %xmm0 - paddd (\i-7)*16(%eax), %xmm0 - - movdqa %xmm3, %xmm2 - psrld $10, %xmm3 - pslld $13, %xmm2 - movdqa %xmm3, %xmm1 - psrld $7, %xmm1 - pxor %xmm1, %xmm3 - pxor %xmm2, %xmm3 - psrld $2, %xmm1 - pslld $2, %xmm2 - pxor %xmm1, %xmm3 - pxor %xmm2, %xmm3 - paddd %xmm0, %xmm3 - movdqa %xmm3, \i*16(%eax) -.endm - -.macro sha256_sse2_extend_doubleround i - movdqa (\i-15)*16(%eax), %xmm0 - movdqa (\i-14)*16(%eax), %xmm4 - movdqa %xmm0, %xmm2 - movdqa %xmm4, %xmm6 - psrld $3, %xmm0 - psrld $3, %xmm4 - movdqa %xmm0, %xmm1 - movdqa %xmm4, %xmm5 - pslld $14, %xmm2 - pslld $14, %xmm6 - psrld $4, %xmm1 - psrld $4, %xmm5 - pxor %xmm1, %xmm0 - pxor %xmm5, %xmm4 - psrld $11, %xmm1 - psrld $11, %xmm5 - pxor %xmm2, %xmm0 - pxor %xmm6, %xmm4 - pslld $11, %xmm2 - pslld $11, %xmm6 - pxor %xmm1, %xmm0 - pxor %xmm5, %xmm4 - pxor %xmm2, %xmm0 - pxor %xmm6, %xmm4 - - paddd (\i-16)*16(%eax), %xmm0 - paddd (\i-15)*16(%eax), %xmm4 - - movdqa %xmm3, %xmm2 - movdqa %xmm7, %xmm6 - psrld $10, %xmm3 - psrld $10, %xmm7 - movdqa %xmm3, %xmm1 - movdqa %xmm7, %xmm5 - pslld $13, %xmm2 - pslld $13, %xmm6 - psrld $7, %xmm1 - psrld $7, %xmm5 - - paddd (\i-7)*16(%eax), %xmm0 - paddd (\i-6)*16(%eax), %xmm4 - - pxor %xmm1, %xmm3 - pxor %xmm5, %xmm7 - psrld $2, %xmm1 - psrld $2, %xmm5 - pxor %xmm2, %xmm3 - pxor %xmm6, %xmm7 - pslld $2, %xmm2 - pslld $2, %xmm6 - pxor %xmm1, %xmm3 - pxor %xmm5, %xmm7 - pxor %xmm2, %xmm3 - pxor %xmm6, %xmm7 - - paddd %xmm0, %xmm3 - paddd %xmm4, %xmm7 - movdqa %xmm3, \i*16(%eax) - movdqa %xmm7, (\i+1)*16(%eax) -.endm - -.macro sha256_sse2_main_round i - movdqa 16*(\i)(%eax), %xmm6 - - movdqa %xmm0, %xmm1 - movdqa 16(%esp), %xmm2 - pandn %xmm2, %xmm1 - paddd 32(%esp), %xmm6 - - movdqa %xmm2, 32(%esp) - movdqa 0(%esp), %xmm2 - movdqa %xmm2, 16(%esp) - - pand %xmm0, %xmm2 - pxor %xmm2, %xmm1 - movdqa %xmm0, 0(%esp) - - paddd %xmm1, %xmm6 - - movdqa %xmm0, %xmm1 - psrld $6, %xmm0 - paddd 16*(\i)+sha256_4k, %xmm6 - movdqa %xmm0, %xmm2 - pslld $7, %xmm1 - psrld $5, %xmm2 - pxor %xmm1, %xmm0 - pxor %xmm2, %xmm0 - pslld $14, %xmm1 - psrld $14, %xmm2 - pxor %xmm1, %xmm0 - pslld $5, %xmm1 - pxor %xmm2, %xmm0 - pxor %xmm1, %xmm0 - movdqa %xmm5, %xmm1 - paddd %xmm0, %xmm6 - - movdqa %xmm3, %xmm0 - movdqa %xmm4, %xmm3 - movdqa %xmm4, %xmm2 - paddd %xmm6, %xmm0 - pand %xmm5, %xmm2 - pand %xmm7, %xmm1 - pand %xmm7, %xmm4 - pxor %xmm4, %xmm1 - movdqa %xmm5, %xmm4 - movdqa %xmm7, %xmm5 - pxor %xmm2, %xmm1 - paddd %xmm1, %xmm6 - - movdqa %xmm7, %xmm2 - psrld $2, %xmm7 - movdqa %xmm7, %xmm1 - pslld $10, %xmm2 - psrld $11, %xmm1 - pxor %xmm2, %xmm7 - pslld $9, %xmm2 - pxor %xmm1, %xmm7 - psrld $9, %xmm1 - pxor %xmm2, %xmm7 - pslld $11, %xmm2 - pxor %xmm1, %xmm7 - pxor %xmm2, %xmm7 - paddd %xmm6, %xmm7 -.endm - -.macro sha256_sse2_main_quadround i - sha256_sse2_main_round \i+0 - sha256_sse2_main_round \i+1 - sha256_sse2_main_round \i+2 - sha256_sse2_main_round \i+3 -.endm - - -.macro p2bswap_esi_esp i - movdqu \i*16(%esi), %xmm0 - movdqu (\i+1)*16(%esi), %xmm2 - pshuflw $0xb1, %xmm0, %xmm0 - pshuflw $0xb1, %xmm2, %xmm2 - pshufhw $0xb1, %xmm0, %xmm0 - pshufhw $0xb1, %xmm2, %xmm2 - movdqa %xmm0, %xmm1 - movdqa %xmm2, %xmm3 - psrlw $8, %xmm1 - psrlw $8, %xmm3 - psllw $8, %xmm0 - psllw $8, %xmm2 - pxor %xmm1, %xmm0 - pxor %xmm3, %xmm2 - movdqa %xmm0, (\i+3)*16(%esp) - movdqa %xmm2, (\i+4)*16(%esp) -.endm - - .text - .p2align 5 - .globl sha256_transform_4way - .globl _sha256_transform_4way -sha256_transform_4way: -_sha256_transform_4way: - pushl %edi - pushl %esi - movl 12(%esp), %edi - movl 16(%esp), %esi - movl 20(%esp), %ecx - movl %esp, %edx - subl $67*16, %esp - andl $-128, %esp - - testl %ecx, %ecx - jnz sha256_transform_4way_swap - - movdqu 0*16(%esi), %xmm0 - movdqu 1*16(%esi), %xmm1 - movdqu 2*16(%esi), %xmm2 - movdqu 3*16(%esi), %xmm3 - movdqu 4*16(%esi), %xmm4 - movdqu 5*16(%esi), %xmm5 - movdqu 6*16(%esi), %xmm6 - movdqu 7*16(%esi), %xmm7 - movdqa %xmm0, 3*16(%esp) - movdqa %xmm1, 4*16(%esp) - movdqa %xmm2, 5*16(%esp) - movdqa %xmm3, 6*16(%esp) - movdqa %xmm4, 7*16(%esp) - movdqa %xmm5, 8*16(%esp) - movdqa %xmm6, 9*16(%esp) - movdqa %xmm7, 10*16(%esp) - movdqu 8*16(%esi), %xmm0 - movdqu 9*16(%esi), %xmm1 - movdqu 10*16(%esi), %xmm2 - movdqu 11*16(%esi), %xmm3 - movdqu 12*16(%esi), %xmm4 - movdqu 13*16(%esi), %xmm5 - movdqu 14*16(%esi), %xmm6 - movdqu 15*16(%esi), %xmm7 - movdqa %xmm0, 11*16(%esp) - movdqa %xmm1, 12*16(%esp) - movdqa %xmm2, 13*16(%esp) - movdqa %xmm3, 14*16(%esp) - movdqa %xmm4, 15*16(%esp) - movdqa %xmm5, 16*16(%esp) - movdqa %xmm6, 17*16(%esp) - movdqa %xmm7, 18*16(%esp) - jmp sha256_transform_4way_extend - - .p2align 5 -sha256_transform_4way_swap: - p2bswap_esi_esp 0 - p2bswap_esi_esp 2 - p2bswap_esi_esp 4 - p2bswap_esi_esp 6 - p2bswap_esi_esp 8 - p2bswap_esi_esp 10 - p2bswap_esi_esp 12 - p2bswap_esi_esp 14 - -sha256_transform_4way_extend: - leal 19*16(%esp), %ecx - leal 48*16(%ecx), %eax - movdqa -2*16(%ecx), %xmm3 - movdqa -1*16(%ecx), %xmm7 -sha256_transform_4way_extend_loop: - movdqa -15*16(%ecx), %xmm0 - movdqa -14*16(%ecx), %xmm4 - movdqa %xmm0, %xmm2 - movdqa %xmm4, %xmm6 - psrld $3, %xmm0 - psrld $3, %xmm4 - movdqa %xmm0, %xmm1 - movdqa %xmm4, %xmm5 - pslld $14, %xmm2 - pslld $14, %xmm6 - psrld $4, %xmm1 - psrld $4, %xmm5 - pxor %xmm1, %xmm0 - pxor %xmm5, %xmm4 - psrld $11, %xmm1 - psrld $11, %xmm5 - pxor %xmm2, %xmm0 - pxor %xmm6, %xmm4 - pslld $11, %xmm2 - pslld $11, %xmm6 - pxor %xmm1, %xmm0 - pxor %xmm5, %xmm4 - pxor %xmm2, %xmm0 - pxor %xmm6, %xmm4 - - paddd -16*16(%ecx), %xmm0 - paddd -15*16(%ecx), %xmm4 - - movdqa %xmm3, %xmm2 - movdqa %xmm7, %xmm6 - psrld $10, %xmm3 - psrld $10, %xmm7 - movdqa %xmm3, %xmm1 - movdqa %xmm7, %xmm5 - pslld $13, %xmm2 - pslld $13, %xmm6 - psrld $7, %xmm1 - psrld $7, %xmm5 - - paddd -7*16(%ecx), %xmm0 - paddd -6*16(%ecx), %xmm4 - - pxor %xmm1, %xmm3 - pxor %xmm5, %xmm7 - psrld $2, %xmm1 - psrld $2, %xmm5 - pxor %xmm2, %xmm3 - pxor %xmm6, %xmm7 - pslld $2, %xmm2 - pslld $2, %xmm6 - pxor %xmm1, %xmm3 - pxor %xmm5, %xmm7 - pxor %xmm2, %xmm3 - pxor %xmm6, %xmm7 - - paddd %xmm0, %xmm3 - paddd %xmm4, %xmm7 - movdqa %xmm3, (%ecx) - movdqa %xmm7, 16(%ecx) - addl $2*16, %ecx - cmpl %ecx, %eax - jne sha256_transform_4way_extend_loop - - movdqu 0(%edi), %xmm7 - movdqu 16(%edi), %xmm5 - movdqu 32(%edi), %xmm4 - movdqu 48(%edi), %xmm3 - movdqu 64(%edi), %xmm0 - movdqu 80(%edi), %xmm1 - movdqu 96(%edi), %xmm2 - movdqu 112(%edi), %xmm6 - movdqa %xmm1, 0(%esp) - movdqa %xmm2, 16(%esp) - movdqa %xmm6, 32(%esp) - - xorl %eax, %eax -sha256_transform_4way_main_loop: - movdqa 3*16(%esp, %eax), %xmm6 - paddd sha256_4k(%eax), %xmm6 - paddd 32(%esp), %xmm6 - - movdqa %xmm0, %xmm1 - movdqa 16(%esp), %xmm2 - pandn %xmm2, %xmm1 - - movdqa %xmm2, 32(%esp) - movdqa 0(%esp), %xmm2 - movdqa %xmm2, 16(%esp) - - pand %xmm0, %xmm2 - pxor %xmm2, %xmm1 - movdqa %xmm0, 0(%esp) - - paddd %xmm1, %xmm6 - - movdqa %xmm0, %xmm1 - psrld $6, %xmm0 - movdqa %xmm0, %xmm2 - pslld $7, %xmm1 - psrld $5, %xmm2 - pxor %xmm1, %xmm0 - pxor %xmm2, %xmm0 - pslld $14, %xmm1 - psrld $14, %xmm2 - pxor %xmm1, %xmm0 - pxor %xmm2, %xmm0 - pslld $5, %xmm1 - pxor %xmm1, %xmm0 - paddd %xmm0, %xmm6 - - movdqa %xmm3, %xmm0 - paddd %xmm6, %xmm0 - - movdqa %xmm5, %xmm1 - movdqa %xmm4, %xmm3 - movdqa %xmm4, %xmm2 - pand %xmm5, %xmm2 - pand %xmm7, %xmm4 - pand %xmm7, %xmm1 - pxor %xmm4, %xmm1 - movdqa %xmm5, %xmm4 - movdqa %xmm7, %xmm5 - pxor %xmm2, %xmm1 - paddd %xmm1, %xmm6 - - movdqa %xmm7, %xmm2 - psrld $2, %xmm7 - movdqa %xmm7, %xmm1 - pslld $10, %xmm2 - psrld $11, %xmm1 - pxor %xmm2, %xmm7 - pxor %xmm1, %xmm7 - pslld $9, %xmm2 - psrld $9, %xmm1 - pxor %xmm2, %xmm7 - pxor %xmm1, %xmm7 - pslld $11, %xmm2 - pxor %xmm2, %xmm7 - paddd %xmm6, %xmm7 - - addl $16, %eax - cmpl $16*64, %eax - jne sha256_transform_4way_main_loop - - movdqu 0(%edi), %xmm1 - movdqu 16(%edi), %xmm2 - paddd %xmm1, %xmm7 - paddd %xmm2, %xmm5 - movdqu 32(%edi), %xmm1 - movdqu 48(%edi), %xmm2 - paddd %xmm1, %xmm4 - paddd %xmm2, %xmm3 - - movdqu %xmm7, 0(%edi) - movdqu %xmm5, 16(%edi) - movdqu %xmm4, 32(%edi) - movdqu %xmm3, 48(%edi) - - movdqu 64(%edi), %xmm1 - movdqu 80(%edi), %xmm2 - movdqu 96(%edi), %xmm6 - movdqu 112(%edi), %xmm7 - paddd %xmm1, %xmm0 - paddd 0(%esp), %xmm2 - paddd 16(%esp), %xmm6 - paddd 32(%esp), %xmm7 - - movdqu %xmm0, 64(%edi) - movdqu %xmm2, 80(%edi) - movdqu %xmm6, 96(%edi) - movdqu %xmm7, 112(%edi) - - movl %edx, %esp - popl %esi - popl %edi - ret - - - .text - .p2align 5 - .globl sha256d_ms_4way - .globl _sha256d_ms_4way -sha256d_ms_4way: -_sha256d_ms_4way: - pushl %edi - pushl %esi - pushl %ebp - movl 16(%esp), %edi - movl 20(%esp), %esi - movl 24(%esp), %edx - movl 28(%esp), %ecx - movl %esp, %ebp - subl $67*16, %esp - andl $-128, %esp - - leal 256(%esi), %eax - -sha256d_ms_4way_extend_loop1: - movdqa 3*16(%esi), %xmm0 - movdqa 2*16(%eax), %xmm3 - movdqa 3*16(%eax), %xmm7 - movdqa %xmm3, 5*16(%esp) - movdqa %xmm7, 6*16(%esp) - movdqa %xmm0, %xmm2 - paddd %xmm0, %xmm7 - psrld $3, %xmm0 - movdqa %xmm0, %xmm1 - pslld $14, %xmm2 - psrld $4, %xmm1 - pxor %xmm1, %xmm0 - pxor %xmm2, %xmm0 - psrld $11, %xmm1 - pslld $11, %xmm2 - pxor %xmm1, %xmm0 - pxor %xmm2, %xmm0 - paddd %xmm0, %xmm3 - movdqa %xmm3, 2*16(%eax) - movdqa %xmm7, 3*16(%eax) - - movdqa 4*16(%eax), %xmm0 - movdqa %xmm0, 7*16(%esp) - movdqa %xmm3, %xmm2 - movdqa %xmm7, %xmm6 - psrld $10, %xmm3 - psrld $10, %xmm7 - movdqa %xmm3, %xmm1 - movdqa %xmm7, %xmm5 - pslld $13, %xmm2 - pslld $13, %xmm6 - psrld $7, %xmm1 - psrld $7, %xmm5 - pxor %xmm1, %xmm3 - pxor %xmm5, %xmm7 - psrld $2, %xmm1 - psrld $2, %xmm5 - pxor %xmm2, %xmm3 - pxor %xmm6, %xmm7 - pslld $2, %xmm2 - pslld $2, %xmm6 - pxor %xmm1, %xmm3 - pxor %xmm5, %xmm7 - pxor %xmm2, %xmm3 - pxor %xmm6, %xmm7 - paddd %xmm0, %xmm3 - movdqa %xmm3, 4*16(%eax) - movdqa %xmm7, 5*16(%eax) - - movdqa 6*16(%eax), %xmm0 - movdqa 7*16(%eax), %xmm4 - movdqa %xmm0, 9*16(%esp) - movdqa %xmm4, 10*16(%esp) - movdqa %xmm3, %xmm2 - movdqa %xmm7, %xmm6 - psrld $10, %xmm3 - psrld $10, %xmm7 - movdqa %xmm3, %xmm1 - movdqa %xmm7, %xmm5 - pslld $13, %xmm2 - pslld $13, %xmm6 - psrld $7, %xmm1 - psrld $7, %xmm5 - pxor %xmm1, %xmm3 - pxor %xmm5, %xmm7 - psrld $2, %xmm1 - psrld $2, %xmm5 - pxor %xmm2, %xmm3 - pxor %xmm6, %xmm7 - pslld $2, %xmm2 - pslld $2, %xmm6 - pxor %xmm1, %xmm3 - pxor %xmm5, %xmm7 - pxor %xmm2, %xmm3 - pxor %xmm6, %xmm7 - paddd %xmm0, %xmm3 - paddd %xmm4, %xmm7 - movdqa %xmm3, 6*16(%eax) - movdqa %xmm7, 7*16(%eax) - - movdqa 8*16(%eax), %xmm0 - movdqa 2*16(%eax), %xmm4 - movdqa %xmm0, 11*16(%esp) - movdqa %xmm3, %xmm2 - movdqa %xmm7, %xmm6 - psrld $10, %xmm3 - psrld $10, %xmm7 - movdqa %xmm3, %xmm1 - movdqa %xmm7, %xmm5 - pslld $13, %xmm2 - pslld $13, %xmm6 - psrld $7, %xmm1 - psrld $7, %xmm5 - pxor %xmm1, %xmm3 - pxor %xmm5, %xmm7 - psrld $2, %xmm1 - psrld $2, %xmm5 - pxor %xmm2, %xmm3 - pxor %xmm6, %xmm7 - pslld $2, %xmm2 - pslld $2, %xmm6 - pxor %xmm1, %xmm3 - pxor %xmm5, %xmm7 - pxor %xmm2, %xmm3 - pxor %xmm6, %xmm7 - paddd %xmm0, %xmm3 - paddd %xmm4, %xmm7 - movdqa %xmm3, 8*16(%eax) - movdqa %xmm7, 9*16(%eax) - - movdqa %xmm3, %xmm2 - movdqa %xmm7, %xmm6 - psrld $10, %xmm3 - psrld $10, %xmm7 - movdqa %xmm3, %xmm1 - movdqa %xmm7, %xmm5 - pslld $13, %xmm2 - pslld $13, %xmm6 - psrld $7, %xmm1 - psrld $7, %xmm5 - pxor %xmm1, %xmm3 - pxor %xmm5, %xmm7 - psrld $2, %xmm1 - psrld $2, %xmm5 - pxor %xmm2, %xmm3 - pxor %xmm6, %xmm7 - pslld $2, %xmm2 - pslld $2, %xmm6 - pxor %xmm1, %xmm3 - pxor %xmm5, %xmm7 - pxor %xmm2, %xmm3 - pxor %xmm6, %xmm7 - paddd 3*16(%eax), %xmm3 - paddd 4*16(%eax), %xmm7 - movdqa %xmm3, 10*16(%eax) - movdqa %xmm7, 11*16(%eax) - - movdqa %xmm3, %xmm2 - movdqa %xmm7, %xmm6 - psrld $10, %xmm3 - psrld $10, %xmm7 - movdqa %xmm3, %xmm1 - movdqa %xmm7, %xmm5 - pslld $13, %xmm2 - pslld $13, %xmm6 - psrld $7, %xmm1 - psrld $7, %xmm5 - pxor %xmm1, %xmm3 - pxor %xmm5, %xmm7 - psrld $2, %xmm1 - psrld $2, %xmm5 - pxor %xmm2, %xmm3 - pxor %xmm6, %xmm7 - pslld $2, %xmm2 - pslld $2, %xmm6 - pxor %xmm1, %xmm3 - pxor %xmm5, %xmm7 - pxor %xmm2, %xmm3 - pxor %xmm6, %xmm7 - paddd 5*16(%eax), %xmm3 - paddd 6*16(%eax), %xmm7 - movdqa %xmm3, 12*16(%eax) - movdqa %xmm7, 13*16(%eax) - - movdqa 14*16(%eax), %xmm0 - movdqa 15*16(%eax), %xmm4 - movdqa %xmm0, 17*16(%esp) - movdqa %xmm4, 18*16(%esp) - movdqa %xmm3, %xmm2 - movdqa %xmm7, %xmm6 - psrld $10, %xmm3 - psrld $10, %xmm7 - movdqa %xmm3, %xmm1 - movdqa %xmm7, %xmm5 - paddd 7*16(%eax), %xmm0 - paddd 8*16(%eax), %xmm4 - pslld $13, %xmm2 - pslld $13, %xmm6 - psrld $7, %xmm1 - psrld $7, %xmm5 - pxor %xmm1, %xmm3 - pxor %xmm5, %xmm7 - psrld $2, %xmm1 - psrld $2, %xmm5 - pxor %xmm2, %xmm3 - pxor %xmm6, %xmm7 - pslld $2, %xmm2 - pslld $2, %xmm6 - pxor %xmm1, %xmm3 - pxor %xmm5, %xmm7 - pxor %xmm2, %xmm3 - pxor %xmm6, %xmm7 - paddd %xmm0, %xmm3 - paddd %xmm4, %xmm7 - movdqa %xmm3, 14*16(%eax) - movdqa %xmm7, 15*16(%eax) - -sha256d_ms_4way_extend_loop2: - sha256_sse2_extend_doubleround 16 - sha256_sse2_extend_doubleround 18 - sha256_sse2_extend_doubleround 20 - sha256_sse2_extend_doubleround 22 - sha256_sse2_extend_doubleround 24 - sha256_sse2_extend_doubleround 26 - sha256_sse2_extend_doubleround 28 - sha256_sse2_extend_doubleround 30 - sha256_sse2_extend_doubleround 32 - sha256_sse2_extend_doubleround 34 - sha256_sse2_extend_doubleround 36 - sha256_sse2_extend_doubleround 38 - sha256_sse2_extend_doubleround 40 - sha256_sse2_extend_doubleround 42 - jz sha256d_ms_4way_extend_coda2 - sha256_sse2_extend_doubleround 44 - sha256_sse2_extend_doubleround 46 - - movdqa 0(%ecx), %xmm3 - movdqa 16(%ecx), %xmm0 - movdqa 32(%ecx), %xmm1 - movdqa 48(%ecx), %xmm2 - movdqa 64(%ecx), %xmm6 - movdqa 80(%ecx), %xmm7 - movdqa 96(%ecx), %xmm5 - movdqa 112(%ecx), %xmm4 - movdqa %xmm1, 0(%esp) - movdqa %xmm2, 16(%esp) - movdqa %xmm6, 32(%esp) - - movl %esi, %eax - jmp sha256d_ms_4way_main_loop1 - -sha256d_ms_4way_main_loop2: - sha256_sse2_main_round 0 - sha256_sse2_main_round 1 - sha256_sse2_main_round 2 -sha256d_ms_4way_main_loop1: - sha256_sse2_main_round 3 - sha256_sse2_main_quadround 4 - sha256_sse2_main_quadround 8 - sha256_sse2_main_quadround 12 - sha256_sse2_main_quadround 16 - sha256_sse2_main_quadround 20 - sha256_sse2_main_quadround 24 - sha256_sse2_main_quadround 28 - sha256_sse2_main_quadround 32 - sha256_sse2_main_quadround 36 - sha256_sse2_main_quadround 40 - sha256_sse2_main_quadround 44 - sha256_sse2_main_quadround 48 - sha256_sse2_main_quadround 52 - sha256_sse2_main_round 56 - jz sha256d_ms_4way_finish - sha256_sse2_main_round 57 - sha256_sse2_main_round 58 - sha256_sse2_main_round 59 - sha256_sse2_main_quadround 60 - - movdqa 5*16(%esp), %xmm1 - movdqa 6*16(%esp), %xmm2 - movdqa 7*16(%esp), %xmm6 - movdqa %xmm1, 18*16(%esi) - movdqa %xmm2, 19*16(%esi) - movdqa %xmm6, 20*16(%esi) - movdqa 9*16(%esp), %xmm1 - movdqa 10*16(%esp), %xmm2 - movdqa 11*16(%esp), %xmm6 - movdqa %xmm1, 22*16(%esi) - movdqa %xmm2, 23*16(%esi) - movdqa %xmm6, 24*16(%esi) - movdqa 17*16(%esp), %xmm1 - movdqa 18*16(%esp), %xmm2 - movdqa %xmm1, 30*16(%esi) - movdqa %xmm2, 31*16(%esi) - - movdqa 0(%esp), %xmm1 - movdqa 16(%esp), %xmm2 - movdqa 32(%esp), %xmm6 - paddd 0(%edx), %xmm7 - paddd 16(%edx), %xmm5 - paddd 32(%edx), %xmm4 - paddd 48(%edx), %xmm3 - paddd 64(%edx), %xmm0 - paddd 80(%edx), %xmm1 - paddd 96(%edx), %xmm2 - paddd 112(%edx), %xmm6 - - movdqa %xmm7, 48+0(%esp) - movdqa %xmm5, 48+16(%esp) - movdqa %xmm4, 48+32(%esp) - movdqa %xmm3, 48+48(%esp) - movdqa %xmm0, 48+64(%esp) - movdqa %xmm1, 48+80(%esp) - movdqa %xmm2, 48+96(%esp) - movdqa %xmm6, 48+112(%esp) - - movdqa sha256d_4preext2_15, %xmm1 - movdqa sha256d_4preext2_24, %xmm2 - pxor %xmm0, %xmm0 - movdqa %xmm2, 48+128(%esp) - movdqa %xmm0, 48+144(%esp) - movdqa %xmm0, 48+160(%esp) - movdqa %xmm0, 48+176(%esp) - movdqa %xmm0, 48+192(%esp) - movdqa %xmm0, 48+208(%esp) - movdqa %xmm0, 48+224(%esp) - movdqa %xmm1, 48+240(%esp) - - leal 19*16(%esp), %eax - cmpl %eax, %eax - - movdqa -15*16(%eax), %xmm0 - movdqa -14*16(%eax), %xmm4 - movdqa %xmm0, %xmm2 - movdqa %xmm4, %xmm6 - psrld $3, %xmm0 - psrld $3, %xmm4 - movdqa %xmm0, %xmm1 - movdqa %xmm4, %xmm5 - pslld $14, %xmm2 - pslld $14, %xmm6 - psrld $4, %xmm1 - psrld $4, %xmm5 - pxor %xmm1, %xmm0 - pxor %xmm5, %xmm4 - psrld $11, %xmm1 - psrld $11, %xmm5 - pxor %xmm2, %xmm0 - pxor %xmm6, %xmm4 - pslld $11, %xmm2 - pslld $11, %xmm6 - pxor %xmm1, %xmm0 - pxor %xmm5, %xmm4 - pxor %xmm2, %xmm0 - pxor %xmm6, %xmm4 - paddd -16*16(%eax), %xmm0 - paddd -15*16(%eax), %xmm4 - paddd sha256d_4preext2_17, %xmm4 - movdqa %xmm0, %xmm3 - movdqa %xmm4, %xmm7 - movdqa %xmm3, 0*16(%eax) - movdqa %xmm7, 1*16(%eax) - - sha256_sse2_extend_doubleround 2 - sha256_sse2_extend_doubleround 4 - - movdqa -9*16(%eax), %xmm0 - movdqa sha256d_4preext2_23, %xmm4 - movdqa %xmm0, %xmm2 - psrld $3, %xmm0 - movdqa %xmm0, %xmm1 - pslld $14, %xmm2 - psrld $4, %xmm1 - pxor %xmm1, %xmm0 - pxor %xmm2, %xmm0 - psrld $11, %xmm1 - pslld $11, %xmm2 - pxor %xmm1, %xmm0 - pxor %xmm2, %xmm0 - paddd -10*16(%eax), %xmm0 - paddd -9*16(%eax), %xmm4 - movdqa %xmm3, %xmm2 - movdqa %xmm7, %xmm6 - psrld $10, %xmm3 - psrld $10, %xmm7 - movdqa %xmm3, %xmm1 - movdqa %xmm7, %xmm5 - paddd -1*16(%eax), %xmm0 - pslld $13, %xmm2 - pslld $13, %xmm6 - psrld $7, %xmm1 - psrld $7, %xmm5 - paddd 0*16(%eax), %xmm4 - pxor %xmm1, %xmm3 - pxor %xmm5, %xmm7 - psrld $2, %xmm1 - psrld $2, %xmm5 - pxor %xmm2, %xmm3 - pxor %xmm6, %xmm7 - pslld $2, %xmm2 - pslld $2, %xmm6 - pxor %xmm1, %xmm3 - pxor %xmm5, %xmm7 - pxor %xmm2, %xmm3 - pxor %xmm6, %xmm7 - paddd %xmm0, %xmm3 - paddd %xmm4, %xmm7 - movdqa %xmm3, 6*16(%eax) - movdqa %xmm7, 7*16(%eax) - - movdqa sha256d_4preext2_24, %xmm0 - movdqa %xmm3, %xmm2 - movdqa %xmm7, %xmm6 - psrld $10, %xmm3 - psrld $10, %xmm7 - movdqa %xmm3, %xmm1 - movdqa %xmm7, %xmm5 - paddd 1*16(%eax), %xmm0 - pslld $13, %xmm2 - pslld $13, %xmm6 - psrld $7, %xmm1 - psrld $7, %xmm5 - pxor %xmm1, %xmm3 - pxor %xmm5, %xmm7 - psrld $2, %xmm1 - psrld $2, %xmm5 - pxor %xmm2, %xmm3 - pxor %xmm6, %xmm7 - pslld $2, %xmm2 - pslld $2, %xmm6 - pxor %xmm1, %xmm3 - pxor %xmm5, %xmm7 - pxor %xmm2, %xmm3 - pxor %xmm6, %xmm7 - paddd %xmm0, %xmm3 - paddd 2*16(%eax), %xmm7 - movdqa %xmm3, 8*16(%eax) - movdqa %xmm7, 9*16(%eax) - - movdqa %xmm3, %xmm2 - movdqa %xmm7, %xmm6 - psrld $10, %xmm3 - psrld $10, %xmm7 - movdqa %xmm3, %xmm1 - movdqa %xmm7, %xmm5 - pslld $13, %xmm2 - pslld $13, %xmm6 - psrld $7, %xmm1 - psrld $7, %xmm5 - pxor %xmm1, %xmm3 - pxor %xmm5, %xmm7 - psrld $2, %xmm1 - psrld $2, %xmm5 - pxor %xmm2, %xmm3 - pxor %xmm6, %xmm7 - pslld $2, %xmm2 - pslld $2, %xmm6 - pxor %xmm1, %xmm3 - pxor %xmm5, %xmm7 - pxor %xmm2, %xmm3 - pxor %xmm6, %xmm7 - paddd 3*16(%eax), %xmm3 - paddd 4*16(%eax), %xmm7 - movdqa %xmm3, 10*16(%eax) - movdqa %xmm7, 11*16(%eax) - - movdqa %xmm3, %xmm2 - movdqa %xmm7, %xmm6 - psrld $10, %xmm3 - psrld $10, %xmm7 - movdqa %xmm3, %xmm1 - movdqa %xmm7, %xmm5 - pslld $13, %xmm2 - pslld $13, %xmm6 - psrld $7, %xmm1 - psrld $7, %xmm5 - pxor %xmm1, %xmm3 - pxor %xmm5, %xmm7 - psrld $2, %xmm1 - psrld $2, %xmm5 - pxor %xmm2, %xmm3 - pxor %xmm6, %xmm7 - pslld $2, %xmm2 - pslld $2, %xmm6 - pxor %xmm1, %xmm3 - pxor %xmm5, %xmm7 - pxor %xmm2, %xmm3 - pxor %xmm6, %xmm7 - paddd 5*16(%eax), %xmm3 - paddd 6*16(%eax), %xmm7 - movdqa %xmm3, 12*16(%eax) - movdqa %xmm7, 13*16(%eax) - - movdqa sha256d_4preext2_30, %xmm0 - movdqa 0*16(%eax), %xmm4 - movdqa %xmm4, %xmm6 - psrld $3, %xmm4 - movdqa %xmm4, %xmm5 - pslld $14, %xmm6 - psrld $4, %xmm5 - pxor %xmm5, %xmm4 - pxor %xmm6, %xmm4 - psrld $11, %xmm5 - pslld $11, %xmm6 - pxor %xmm5, %xmm4 - pxor %xmm6, %xmm4 - paddd -1*16(%eax), %xmm4 - movdqa %xmm3, %xmm2 - movdqa %xmm7, %xmm6 - psrld $10, %xmm3 - psrld $10, %xmm7 - movdqa %xmm3, %xmm1 - movdqa %xmm7, %xmm5 - paddd 7*16(%eax), %xmm0 - pslld $13, %xmm2 - pslld $13, %xmm6 - psrld $7, %xmm1 - psrld $7, %xmm5 - paddd 8*16(%eax), %xmm4 - pxor %xmm1, %xmm3 - pxor %xmm5, %xmm7 - psrld $2, %xmm1 - psrld $2, %xmm5 - pxor %xmm2, %xmm3 - pxor %xmm6, %xmm7 - pslld $2, %xmm2 - pslld $2, %xmm6 - pxor %xmm1, %xmm3 - pxor %xmm5, %xmm7 - pxor %xmm2, %xmm3 - pxor %xmm6, %xmm7 - paddd %xmm0, %xmm3 - paddd %xmm4, %xmm7 - movdqa %xmm3, 14*16(%eax) - movdqa %xmm7, 15*16(%eax) - - jmp sha256d_ms_4way_extend_loop2 - -sha256d_ms_4way_extend_coda2: - sha256_sse2_extend_round 44 - - movdqa sha256_4h+0, %xmm7 - movdqa sha256_4h+16, %xmm5 - movdqa sha256_4h+32, %xmm4 - movdqa sha256_4h+48, %xmm3 - movdqa sha256_4h+64, %xmm0 - movdqa sha256_4h+80, %xmm1 - movdqa sha256_4h+96, %xmm2 - movdqa sha256_4h+112, %xmm6 - movdqa %xmm1, 0(%esp) - movdqa %xmm2, 16(%esp) - movdqa %xmm6, 32(%esp) - - leal 48(%esp), %eax - jmp sha256d_ms_4way_main_loop2 - -.macro sha256_sse2_main_round_red i, r7 - movdqa 16*(\i)(%eax), %xmm6 - paddd 16*(\i)+sha256_4k, %xmm6 - paddd 32(%esp), %xmm6 - movdqa %xmm0, %xmm1 - movdqa 16(%esp), %xmm2 - paddd \r7, %xmm6 - pandn %xmm2, %xmm1 - movdqa %xmm2, 32(%esp) - movdqa 0(%esp), %xmm2 - movdqa %xmm2, 16(%esp) - pand %xmm0, %xmm2 - pxor %xmm2, %xmm1 - movdqa %xmm0, 0(%esp) - paddd %xmm1, %xmm6 - movdqa %xmm0, %xmm1 - psrld $6, %xmm0 - movdqa %xmm0, %xmm2 - pslld $7, %xmm1 - psrld $5, %xmm2 - pxor %xmm1, %xmm0 - pxor %xmm2, %xmm0 - pslld $14, %xmm1 - psrld $14, %xmm2 - pxor %xmm1, %xmm0 - pxor %xmm2, %xmm0 - pslld $5, %xmm1 - pxor %xmm1, %xmm0 - paddd %xmm6, %xmm0 -.endm - -sha256d_ms_4way_finish: - sha256_sse2_main_round_red 57, %xmm3 - sha256_sse2_main_round_red 58, %xmm4 - sha256_sse2_main_round_red 59, %xmm5 - sha256_sse2_main_round_red 60, %xmm7 - - paddd sha256_4h+112, %xmm0 - movdqa %xmm0, 112(%edi) - - movl %ebp, %esp - popl %ebp - popl %esi - popl %edi - ret - - - .text - .p2align 5 - .globl sha256_use_4way - .globl _sha256_use_4way -sha256_use_4way: -_sha256_use_4way: - pushl %ebx - - /* Check for SSE2 availability */ - movl $1, %eax - cpuid - andl $0x04000000, %edx - jnz sha256_use_4way_sse2 - xorl %eax, %eax - popl %ebx - ret - -sha256_use_4way_sse2: - movl $1, %eax - popl %ebx - ret - -#endif diff --git a/configure b/configure index 961c5e0c..95a00e24 100755 --- a/configure +++ b/configure @@ -1,6 +1,6 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.71 for cpuminer-opt 23.7. +# Generated by GNU Autoconf 2.71 for cpuminer-opt 23.8. # # # Copyright (C) 1992-1996, 1998-2017, 2020-2021 Free Software Foundation, @@ -608,8 +608,8 @@ MAKEFLAGS= # Identity of this package. PACKAGE_NAME='cpuminer-opt' PACKAGE_TARNAME='cpuminer-opt' -PACKAGE_VERSION='23.7' -PACKAGE_STRING='cpuminer-opt 23.7' +PACKAGE_VERSION='23.8' +PACKAGE_STRING='cpuminer-opt 23.8' PACKAGE_BUGREPORT='' PACKAGE_URL='' @@ -1360,7 +1360,7 @@ if test "$ac_init_help" = "long"; then # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -\`configure' configures cpuminer-opt 23.7 to adapt to many kinds of systems. +\`configure' configures cpuminer-opt 23.8 to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... @@ -1432,7 +1432,7 @@ fi if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of cpuminer-opt 23.7:";; + short | recursive ) echo "Configuration of cpuminer-opt 23.8:";; esac cat <<\_ACEOF @@ -1538,7 +1538,7 @@ fi test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF -cpuminer-opt configure 23.7 +cpuminer-opt configure 23.8 generated by GNU Autoconf 2.71 Copyright (C) 2021 Free Software Foundation, Inc. @@ -1985,7 +1985,7 @@ cat >config.log <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by cpuminer-opt $as_me 23.7, which was +It was created by cpuminer-opt $as_me 23.8, which was generated by GNU Autoconf 2.71. Invocation command line was $ $0$ac_configure_args_raw @@ -3593,7 +3593,7 @@ fi # Define the identity of the package. PACKAGE='cpuminer-opt' - VERSION='23.7' + VERSION='23.8' printf "%s\n" "#define PACKAGE \"$PACKAGE\"" >>confdefs.h @@ -7508,7 +7508,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" -This file was extended by cpuminer-opt $as_me 23.7, which was +This file was extended by cpuminer-opt $as_me 23.8, which was generated by GNU Autoconf 2.71. Invocation command line was CONFIG_FILES = $CONFIG_FILES @@ -7576,7 +7576,7 @@ ac_cs_config_escaped=`printf "%s\n" "$ac_cs_config" | sed "s/^ //; s/'/'\\\\\\\\ cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_cs_config='$ac_cs_config_escaped' ac_cs_version="\\ -cpuminer-opt config.status 23.7 +cpuminer-opt config.status 23.8 configured by $0, generated by GNU Autoconf 2.71, with options \\"\$ac_cs_config\\" diff --git a/configure.ac b/configure.ac index a9da908a..caefba48 100644 --- a/configure.ac +++ b/configure.ac @@ -1,4 +1,4 @@ -AC_INIT([cpuminer-opt], [23.7]) +AC_INIT([cpuminer-opt], [23.8]) AC_PREREQ([2.59c]) AC_CANONICAL_SYSTEM diff --git a/configure~ b/configure~ index 62e4f599..38643133 100755 --- a/configure~ +++ b/configure~ @@ -1,9 +1,10 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.69 for cpuminer-opt 23.7. +# Generated by GNU Autoconf 2.71 for cpuminer-opt 23.8. # # -# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc. +# Copyright (C) 1992-1996, 1998-2017, 2020-2021 Free Software Foundation, +# Inc. # # # This configure script is free software; the Free Software Foundation @@ -14,14 +15,16 @@ # Be more Bourne compatible DUALCASE=1; export DUALCASE # for MKS sh -if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then : +as_nop=: +if test ${ZSH_VERSION+y} && (emulate sh) >/dev/null 2>&1 +then : emulate sh NULLCMD=: # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which # is contrary to our usage. Disable this feature. alias -g '${1+"$@"}'='"$@"' setopt NO_GLOB_SUBST -else +else $as_nop case `(set -o) 2>/dev/null` in #( *posix*) : set -o posix ;; #( @@ -31,46 +34,46 @@ esac fi + +# Reset variables that may have inherited troublesome values from +# the environment. + +# IFS needs to be set, to space, tab, and newline, in precisely that order. +# (If _AS_PATH_WALK were called with IFS unset, it would have the +# side effect of setting IFS to empty, thus disabling word splitting.) +# Quoting is to prevent editors from complaining about space-tab. as_nl=' ' export as_nl -# Printing a long string crashes Solaris 7 /usr/bin/printf. -as_echo='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' -as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo -as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo$as_echo -# Prefer a ksh shell builtin over an external printf program on Solaris, -# but without wasting forks for bash or zsh. -if test -z "$BASH_VERSION$ZSH_VERSION" \ - && (test "X`print -r -- $as_echo`" = "X$as_echo") 2>/dev/null; then - as_echo='print -r --' - as_echo_n='print -rn --' -elif (test "X`printf %s $as_echo`" = "X$as_echo") 2>/dev/null; then - as_echo='printf %s\n' - as_echo_n='printf %s' -else - if test "X`(/usr/ucb/echo -n -n $as_echo) 2>/dev/null`" = "X-n $as_echo"; then - as_echo_body='eval /usr/ucb/echo -n "$1$as_nl"' - as_echo_n='/usr/ucb/echo -n' - else - as_echo_body='eval expr "X$1" : "X\\(.*\\)"' - as_echo_n_body='eval - arg=$1; - case $arg in #( - *"$as_nl"*) - expr "X$arg" : "X\\(.*\\)$as_nl"; - arg=`expr "X$arg" : ".*$as_nl\\(.*\\)"`;; - esac; - expr "X$arg" : "X\\(.*\\)" | tr -d "$as_nl" - ' - export as_echo_n_body - as_echo_n='sh -c $as_echo_n_body as_echo' - fi - export as_echo_body - as_echo='sh -c $as_echo_body as_echo' -fi +IFS=" "" $as_nl" + +PS1='$ ' +PS2='> ' +PS4='+ ' + +# Ensure predictable behavior from utilities with locale-dependent output. +LC_ALL=C +export LC_ALL +LANGUAGE=C +export LANGUAGE + +# We cannot yet rely on "unset" to work, but we need these variables +# to be unset--not just set to an empty or harmless value--now, to +# avoid bugs in old shells (e.g. pre-3.0 UWIN ksh). This construct +# also avoids known problems related to "unset" and subshell syntax +# in other old shells (e.g. bash 2.01 and pdksh 5.2.14). +for as_var in BASH_ENV ENV MAIL MAILPATH CDPATH +do eval test \${$as_var+y} \ + && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || : +done + +# Ensure that fds 0, 1, and 2 are open. +if (exec 3>&0) 2>/dev/null; then :; else exec 0&1) 2>/dev/null; then :; else exec 1>/dev/null; fi +if (exec 3>&2) ; then :; else exec 2>/dev/null; fi # The user is always right. -if test "${PATH_SEPARATOR+set}" != set; then +if ${PATH_SEPARATOR+false} :; then PATH_SEPARATOR=: (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && { (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 || @@ -79,13 +82,6 @@ if test "${PATH_SEPARATOR+set}" != set; then fi -# IFS -# We need space, tab and new line, in precisely that order. Quoting is -# there to prevent editors from complaining about space-tab. -# (If _AS_PATH_WALK were called with IFS unset, it would disable word -# splitting by setting IFS to empty value.) -IFS=" "" $as_nl" - # Find who we are. Look in the path if we contain no directory separator. as_myself= case $0 in #(( @@ -94,8 +90,12 @@ case $0 in #(( for as_dir in $PATH do IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + test -r "$as_dir$0" && as_myself=$as_dir$0 && break done IFS=$as_save_IFS @@ -107,30 +107,10 @@ if test "x$as_myself" = x; then as_myself=$0 fi if test ! -f "$as_myself"; then - $as_echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2 + printf "%s\n" "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2 exit 1 fi -# Unset variables that we do not need and which cause bugs (e.g. in -# pre-3.0 UWIN ksh). But do not cause bugs in bash 2.01; the "|| exit 1" -# suppresses any "Segmentation fault" message there. '((' could -# trigger a bug in pdksh 5.2.14. -for as_var in BASH_ENV ENV MAIL MAILPATH -do eval test x\${$as_var+set} = xset \ - && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || : -done -PS1='$ ' -PS2='> ' -PS4='+ ' - -# NLS nuisances. -LC_ALL=C -export LC_ALL -LANGUAGE=C -export LANGUAGE - -# CDPATH. -(unset CDPATH) >/dev/null 2>&1 && unset CDPATH # Use a proper internal environment variable to ensure we don't fall # into an infinite loop, continuously re-executing ourselves. @@ -152,20 +132,22 @@ esac exec $CONFIG_SHELL $as_opts "$as_myself" ${1+"$@"} # Admittedly, this is quite paranoid, since all the known shells bail # out after a failed `exec'. -$as_echo "$0: could not re-execute with $CONFIG_SHELL" >&2 -as_fn_exit 255 +printf "%s\n" "$0: could not re-execute with $CONFIG_SHELL" >&2 +exit 255 fi # We don't want this to propagate to other subprocesses. { _as_can_reexec=; unset _as_can_reexec;} if test "x$CONFIG_SHELL" = x; then - as_bourne_compatible="if test -n \"\${ZSH_VERSION+set}\" && (emulate sh) >/dev/null 2>&1; then : + as_bourne_compatible="as_nop=: +if test \${ZSH_VERSION+y} && (emulate sh) >/dev/null 2>&1 +then : emulate sh NULLCMD=: # Pre-4.2 versions of Zsh do word splitting on \${1+\"\$@\"}, which # is contrary to our usage. Disable this feature. alias -g '\${1+\"\$@\"}'='\"\$@\"' setopt NO_GLOB_SUBST -else +else \$as_nop case \`(set -o) 2>/dev/null\` in #( *posix*) : set -o posix ;; #( @@ -185,42 +167,53 @@ as_fn_success || { exitcode=1; echo as_fn_success failed.; } as_fn_failure && { exitcode=1; echo as_fn_failure succeeded.; } as_fn_ret_success || { exitcode=1; echo as_fn_ret_success failed.; } as_fn_ret_failure && { exitcode=1; echo as_fn_ret_failure succeeded.; } -if ( set x; as_fn_ret_success y && test x = \"\$1\" ); then : +if ( set x; as_fn_ret_success y && test x = \"\$1\" ) +then : -else +else \$as_nop exitcode=1; echo positional parameters were not saved. fi test x\$exitcode = x0 || exit 1 +blah=\$(echo \$(echo blah)) +test x\"\$blah\" = xblah || exit 1 test -x / || exit 1" as_suggested=" as_lineno_1=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_1a=\$LINENO as_lineno_2=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_2a=\$LINENO eval 'test \"x\$as_lineno_1'\$as_run'\" != \"x\$as_lineno_2'\$as_run'\" && test \"x\`expr \$as_lineno_1'\$as_run' + 1\`\" = \"x\$as_lineno_2'\$as_run'\"' || exit 1 test \$(( 1 + 1 )) = 2 || exit 1" - if (eval "$as_required") 2>/dev/null; then : + if (eval "$as_required") 2>/dev/null +then : as_have_required=yes -else +else $as_nop as_have_required=no fi - if test x$as_have_required = xyes && (eval "$as_suggested") 2>/dev/null; then : + if test x$as_have_required = xyes && (eval "$as_suggested") 2>/dev/null +then : -else +else $as_nop as_save_IFS=$IFS; IFS=$PATH_SEPARATOR as_found=false for as_dir in /bin$PATH_SEPARATOR/usr/bin$PATH_SEPARATOR$PATH do IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac as_found=: case $as_dir in #( /*) for as_base in sh bash ksh sh5; do # Try only shells that exist, to save several forks. - as_shell=$as_dir/$as_base + as_shell=$as_dir$as_base if { test -f "$as_shell" || test -f "$as_shell.exe"; } && - { $as_echo "$as_bourne_compatible""$as_required" | as_run=a "$as_shell"; } 2>/dev/null; then : + as_run=a "$as_shell" -c "$as_bourne_compatible""$as_required" 2>/dev/null +then : CONFIG_SHELL=$as_shell as_have_required=yes - if { $as_echo "$as_bourne_compatible""$as_suggested" | as_run=a "$as_shell"; } 2>/dev/null; then : + if as_run=a "$as_shell" -c "$as_bourne_compatible""$as_suggested" 2>/dev/null +then : break 2 fi fi @@ -228,14 +221,21 @@ fi esac as_found=false done -$as_found || { if { test -f "$SHELL" || test -f "$SHELL.exe"; } && - { $as_echo "$as_bourne_compatible""$as_required" | as_run=a "$SHELL"; } 2>/dev/null; then : - CONFIG_SHELL=$SHELL as_have_required=yes -fi; } IFS=$as_save_IFS +if $as_found +then : + +else $as_nop + if { test -f "$SHELL" || test -f "$SHELL.exe"; } && + as_run=a "$SHELL" -c "$as_bourne_compatible""$as_required" 2>/dev/null +then : + CONFIG_SHELL=$SHELL as_have_required=yes +fi +fi - if test "x$CONFIG_SHELL" != x; then : + if test "x$CONFIG_SHELL" != x +then : export CONFIG_SHELL # We cannot yet assume a decent shell, so we have to provide a # neutralization value for shells without unset; and this also @@ -253,18 +253,19 @@ esac exec $CONFIG_SHELL $as_opts "$as_myself" ${1+"$@"} # Admittedly, this is quite paranoid, since all the known shells bail # out after a failed `exec'. -$as_echo "$0: could not re-execute with $CONFIG_SHELL" >&2 +printf "%s\n" "$0: could not re-execute with $CONFIG_SHELL" >&2 exit 255 fi - if test x$as_have_required = xno; then : - $as_echo "$0: This script requires a shell more modern than all" - $as_echo "$0: the shells that I found on your system." - if test x${ZSH_VERSION+set} = xset ; then - $as_echo "$0: In particular, zsh $ZSH_VERSION has bugs and should" - $as_echo "$0: be upgraded to zsh 4.3.4 or later." + if test x$as_have_required = xno +then : + printf "%s\n" "$0: This script requires a shell more modern than all" + printf "%s\n" "$0: the shells that I found on your system." + if test ${ZSH_VERSION+y} ; then + printf "%s\n" "$0: In particular, zsh $ZSH_VERSION has bugs and should" + printf "%s\n" "$0: be upgraded to zsh 4.3.4 or later." else - $as_echo "$0: Please tell bug-autoconf@gnu.org about your system, + printf "%s\n" "$0: Please tell bug-autoconf@gnu.org about your system, $0: including any error possibly output before this $0: message. Then install a modern shell, or manually run $0: the script under such a shell if you do have one." @@ -291,6 +292,7 @@ as_fn_unset () } as_unset=as_fn_unset + # as_fn_set_status STATUS # ----------------------- # Set $? to STATUS, without forking. @@ -308,6 +310,14 @@ as_fn_exit () as_fn_set_status $1 exit $1 } # as_fn_exit +# as_fn_nop +# --------- +# Do nothing but, unlike ":", preserve the value of $?. +as_fn_nop () +{ + return $? +} +as_nop=as_fn_nop # as_fn_mkdir_p # ------------- @@ -322,7 +332,7 @@ as_fn_mkdir_p () as_dirs= while :; do case $as_dir in #( - *\'*) as_qdir=`$as_echo "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'( + *\'*) as_qdir=`printf "%s\n" "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'( *) as_qdir=$as_dir;; esac as_dirs="'$as_qdir' $as_dirs" @@ -331,7 +341,7 @@ $as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ X"$as_dir" : 'X\(//\)[^/]' \| \ X"$as_dir" : 'X\(//\)$' \| \ X"$as_dir" : 'X\(/\)' \| . 2>/dev/null || -$as_echo X"$as_dir" | +printf "%s\n" X"$as_dir" | sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/ q @@ -370,12 +380,13 @@ as_fn_executable_p () # advantage of any shell optimizations that allow amortized linear growth over # repeated appends, instead of the typical quadratic growth present in naive # implementations. -if (eval "as_var=1; as_var+=2; test x\$as_var = x12") 2>/dev/null; then : +if (eval "as_var=1; as_var+=2; test x\$as_var = x12") 2>/dev/null +then : eval 'as_fn_append () { eval $1+=\$2 }' -else +else $as_nop as_fn_append () { eval $1=\$$1\$2 @@ -387,18 +398,27 @@ fi # as_fn_append # Perform arithmetic evaluation on the ARGs, and store the result in the # global $as_val. Take advantage of shells that can avoid forks. The arguments # must be portable across $(()) and expr. -if (eval "test \$(( 1 + 1 )) = 2") 2>/dev/null; then : +if (eval "test \$(( 1 + 1 )) = 2") 2>/dev/null +then : eval 'as_fn_arith () { as_val=$(( $* )) }' -else +else $as_nop as_fn_arith () { as_val=`expr "$@" || test $? -eq 1` } fi # as_fn_arith +# as_fn_nop +# --------- +# Do nothing but, unlike ":", preserve the value of $?. +as_fn_nop () +{ + return $? +} +as_nop=as_fn_nop # as_fn_error STATUS ERROR [LINENO LOG_FD] # ---------------------------------------- @@ -410,9 +430,9 @@ as_fn_error () as_status=$1; test $as_status -eq 0 && as_status=1 if test "$4"; then as_lineno=${as_lineno-"$3"} as_lineno_stack=as_lineno_stack=$as_lineno_stack - $as_echo "$as_me:${as_lineno-$LINENO}: error: $2" >&$4 + printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: $2" >&$4 fi - $as_echo "$as_me: error: $2" >&2 + printf "%s\n" "$as_me: error: $2" >&2 as_fn_exit $as_status } # as_fn_error @@ -439,7 +459,7 @@ as_me=`$as_basename -- "$0" || $as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ X"$0" : 'X\(//\)$' \| \ X"$0" : 'X\(/\)' \| . 2>/dev/null || -$as_echo X/"$0" | +printf "%s\n" X/"$0" | sed '/^.*\/\([^/][^/]*\)\/*$/{ s//\1/ q @@ -483,7 +503,7 @@ as_cr_alnum=$as_cr_Letters$as_cr_digits s/-\n.*// ' >$as_me.lineno && chmod +x "$as_me.lineno" || - { $as_echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2; as_fn_exit 1; } + { printf "%s\n" "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2; as_fn_exit 1; } # If we had to re-execute with $CONFIG_SHELL, we're ensured to have # already done that, so ensure we don't try to do so again and fall @@ -497,6 +517,10 @@ as_cr_alnum=$as_cr_Letters$as_cr_digits exit } + +# Determine whether it's possible to make 'echo' print without a newline. +# These variables are no longer used directly by Autoconf, but are AC_SUBSTed +# for compatibility with existing Makefiles. ECHO_C= ECHO_N= ECHO_T= case `echo -n x` in #((((( -n*) @@ -510,6 +534,13 @@ case `echo -n x` in #((((( ECHO_N='-n';; esac +# For backward compatibility with old third-party macros, we provide +# the shell variables $as_echo and $as_echo_n. New code should use +# AS_ECHO(["message"]) and AS_ECHO_N(["message"]), respectively. +as_echo='printf %s\n' +as_echo_n='printf %s' + + rm -f conf$$ conf$$.exe conf$$.file if test -d conf$$.dir; then rm -f conf$$.dir/conf$$.file @@ -577,48 +608,44 @@ MAKEFLAGS= # Identity of this package. PACKAGE_NAME='cpuminer-opt' PACKAGE_TARNAME='cpuminer-opt' -PACKAGE_VERSION='23.7' -PACKAGE_STRING='cpuminer-opt 23.7' +PACKAGE_VERSION='23.8' +PACKAGE_STRING='cpuminer-opt 23.8' PACKAGE_BUGREPORT='' PACKAGE_URL='' ac_unique_file="cpu-miner.c" # Factoring default headers for most tests. ac_includes_default="\ -#include -#ifdef HAVE_SYS_TYPES_H -# include -#endif -#ifdef HAVE_SYS_STAT_H -# include +#include +#ifdef HAVE_STDIO_H +# include #endif -#ifdef STDC_HEADERS +#ifdef HAVE_STDLIB_H # include -# include -#else -# ifdef HAVE_STDLIB_H -# include -# endif #endif #ifdef HAVE_STRING_H -# if !defined STDC_HEADERS && defined HAVE_MEMORY_H -# include -# endif # include #endif -#ifdef HAVE_STRINGS_H -# include -#endif #ifdef HAVE_INTTYPES_H # include #endif #ifdef HAVE_STDINT_H # include #endif +#ifdef HAVE_STRINGS_H +# include +#endif +#ifdef HAVE_SYS_TYPES_H +# include +#endif +#ifdef HAVE_SYS_STAT_H +# include +#endif #ifdef HAVE_UNISTD_H # include #endif" +ac_header_c_list= ac_subst_vars='am__EXEEXT_FALSE am__EXEEXT_TRUE LTLIBOBJS @@ -630,6 +657,8 @@ JANSSON_LIBS LIBCURL_CPPFLAGS LIBCURL_CFLAGS LIBCURL +HAVE_MACOS_FALSE +HAVE_MACOS_TRUE MINGW_FALSE MINGW_TRUE ARCH_ARM_FALSE @@ -683,6 +712,9 @@ AM_BACKSLASH AM_DEFAULT_VERBOSITY AM_DEFAULT_V AM_V +CSCOPE +ETAGS +CTAGS am__untar am__tar AMTAR @@ -850,8 +882,6 @@ do *) ac_optarg=yes ;; esac - # Accept the important Cygnus configure options, so we can diagnose typos. - case $ac_dashdash$ac_option in --) ac_dashdash=yes ;; @@ -892,9 +922,9 @@ do ac_useropt=`expr "x$ac_option" : 'x-*disable-\(.*\)'` # Reject names that are not valid shell variable names. expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && - as_fn_error $? "invalid feature name: $ac_useropt" + as_fn_error $? "invalid feature name: \`$ac_useropt'" ac_useropt_orig=$ac_useropt - ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` + ac_useropt=`printf "%s\n" "$ac_useropt" | sed 's/[-+.]/_/g'` case $ac_user_opts in *" "enable_$ac_useropt" @@ -918,9 +948,9 @@ do ac_useropt=`expr "x$ac_option" : 'x-*enable-\([^=]*\)'` # Reject names that are not valid shell variable names. expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && - as_fn_error $? "invalid feature name: $ac_useropt" + as_fn_error $? "invalid feature name: \`$ac_useropt'" ac_useropt_orig=$ac_useropt - ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` + ac_useropt=`printf "%s\n" "$ac_useropt" | sed 's/[-+.]/_/g'` case $ac_user_opts in *" "enable_$ac_useropt" @@ -1131,9 +1161,9 @@ do ac_useropt=`expr "x$ac_option" : 'x-*with-\([^=]*\)'` # Reject names that are not valid shell variable names. expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && - as_fn_error $? "invalid package name: $ac_useropt" + as_fn_error $? "invalid package name: \`$ac_useropt'" ac_useropt_orig=$ac_useropt - ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` + ac_useropt=`printf "%s\n" "$ac_useropt" | sed 's/[-+.]/_/g'` case $ac_user_opts in *" "with_$ac_useropt" @@ -1147,9 +1177,9 @@ do ac_useropt=`expr "x$ac_option" : 'x-*without-\(.*\)'` # Reject names that are not valid shell variable names. expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && - as_fn_error $? "invalid package name: $ac_useropt" + as_fn_error $? "invalid package name: \`$ac_useropt'" ac_useropt_orig=$ac_useropt - ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` + ac_useropt=`printf "%s\n" "$ac_useropt" | sed 's/[-+.]/_/g'` case $ac_user_opts in *" "with_$ac_useropt" @@ -1193,9 +1223,9 @@ Try \`$0 --help' for more information" *) # FIXME: should be removed in autoconf 3.0. - $as_echo "$as_me: WARNING: you should use --build, --host, --target" >&2 + printf "%s\n" "$as_me: WARNING: you should use --build, --host, --target" >&2 expr "x$ac_option" : ".*[^-._$as_cr_alnum]" >/dev/null && - $as_echo "$as_me: WARNING: invalid host type: $ac_option" >&2 + printf "%s\n" "$as_me: WARNING: invalid host type: $ac_option" >&2 : "${build_alias=$ac_option} ${host_alias=$ac_option} ${target_alias=$ac_option}" ;; @@ -1211,7 +1241,7 @@ if test -n "$ac_unrecognized_opts"; then case $enable_option_checking in no) ;; fatal) as_fn_error $? "unrecognized options: $ac_unrecognized_opts" ;; - *) $as_echo "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2 ;; + *) printf "%s\n" "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2 ;; esac fi @@ -1275,7 +1305,7 @@ $as_expr X"$as_myself" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ X"$as_myself" : 'X\(//\)[^/]' \| \ X"$as_myself" : 'X\(//\)$' \| \ X"$as_myself" : 'X\(/\)' \| . 2>/dev/null || -$as_echo X"$as_myself" | +printf "%s\n" X"$as_myself" | sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/ q @@ -1332,7 +1362,7 @@ if test "$ac_init_help" = "long"; then # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -\`configure' configures cpuminer-opt 23.7 to adapt to many kinds of systems. +\`configure' configures cpuminer-opt 23.8 to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... @@ -1404,7 +1434,7 @@ fi if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of cpuminer-opt 23.7:";; + short | recursive ) echo "Configuration of cpuminer-opt 23.8:";; esac cat <<\_ACEOF @@ -1462,9 +1492,9 @@ if test "$ac_init_help" = "recursive"; then case "$ac_dir" in .) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;; *) - ac_dir_suffix=/`$as_echo "$ac_dir" | sed 's|^\.[\\/]||'` + ac_dir_suffix=/`printf "%s\n" "$ac_dir" | sed 's|^\.[\\/]||'` # A ".." for each directory in $ac_dir_suffix. - ac_top_builddir_sub=`$as_echo "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'` + ac_top_builddir_sub=`printf "%s\n" "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'` case $ac_top_builddir_sub in "") ac_top_builddir_sub=. ac_top_build_prefix= ;; *) ac_top_build_prefix=$ac_top_builddir_sub/ ;; @@ -1492,7 +1522,8 @@ esac ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix cd "$ac_dir" || { ac_status=$?; continue; } - # Check for guested configure. + # Check for configure.gnu first; this name is used for a wrapper for + # Metaconfig's "Configure" on case-insensitive file systems. if test -f "$ac_srcdir/configure.gnu"; then echo && $SHELL "$ac_srcdir/configure.gnu" --help=recursive @@ -1500,7 +1531,7 @@ ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix echo && $SHELL "$ac_srcdir/configure" --help=recursive else - $as_echo "$as_me: WARNING: no configuration information is in $ac_dir" >&2 + printf "%s\n" "$as_me: WARNING: no configuration information is in $ac_dir" >&2 fi || ac_status=$? cd "$ac_pwd" || { ac_status=$?; break; } done @@ -1509,10 +1540,10 @@ fi test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF -cpuminer-opt configure 23.7 -generated by GNU Autoconf 2.69 +cpuminer-opt configure 23.8 +generated by GNU Autoconf 2.71 -Copyright (C) 2012 Free Software Foundation, Inc. +Copyright (C) 2021 Free Software Foundation, Inc. This configure script is free software; the Free Software Foundation gives unlimited permission to copy, distribute and modify it. _ACEOF @@ -1529,14 +1560,14 @@ fi ac_fn_c_try_compile () { as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack - rm -f conftest.$ac_objext + rm -f conftest.$ac_objext conftest.beam if { { ac_try="$ac_compile" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" -$as_echo "$ac_try_echo"; } >&5 +printf "%s\n" "$ac_try_echo"; } >&5 (eval "$ac_compile") 2>conftest.err ac_status=$? if test -s conftest.err; then @@ -1544,14 +1575,15 @@ $as_echo "$ac_try_echo"; } >&5 cat conftest.er1 >&5 mv -f conftest.er1 conftest.err fi - $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } && { test -z "$ac_c_werror_flag" || test ! -s conftest.err - } && test -s conftest.$ac_objext; then : + } && test -s conftest.$ac_objext +then : ac_retval=0 -else - $as_echo "$as_me: failed program was:" >&5 +else $as_nop + printf "%s\n" "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_retval=1 @@ -1573,7 +1605,7 @@ case "(($ac_try" in *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" -$as_echo "$ac_try_echo"; } >&5 +printf "%s\n" "$ac_try_echo"; } >&5 (eval "$ac_cpp conftest.$ac_ext") 2>conftest.err ac_status=$? if test -s conftest.err; then @@ -1581,14 +1613,15 @@ $as_echo "$ac_try_echo"; } >&5 cat conftest.er1 >&5 mv -f conftest.er1 conftest.err fi - $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } > conftest.i && { test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" || test ! -s conftest.err - }; then : + } +then : ac_retval=0 -else - $as_echo "$as_me: failed program was:" >&5 +else $as_nop + printf "%s\n" "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_retval=1 @@ -1604,14 +1637,14 @@ fi ac_fn_cxx_try_compile () { as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack - rm -f conftest.$ac_objext + rm -f conftest.$ac_objext conftest.beam if { { ac_try="$ac_compile" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" -$as_echo "$ac_try_echo"; } >&5 +printf "%s\n" "$ac_try_echo"; } >&5 (eval "$ac_compile") 2>conftest.err ac_status=$? if test -s conftest.err; then @@ -1619,14 +1652,15 @@ $as_echo "$ac_try_echo"; } >&5 cat conftest.er1 >&5 mv -f conftest.er1 conftest.err fi - $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } && { test -z "$ac_cxx_werror_flag" || test ! -s conftest.err - } && test -s conftest.$ac_objext; then : + } && test -s conftest.$ac_objext +then : ac_retval=0 -else - $as_echo "$as_me: failed program was:" >&5 +else $as_nop + printf "%s\n" "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_retval=1 @@ -1636,135 +1670,6 @@ fi } # ac_fn_cxx_try_compile -# ac_fn_c_try_run LINENO -# ---------------------- -# Try to link conftest.$ac_ext, and return whether this succeeded. Assumes -# that executables *can* be run. -ac_fn_c_try_run () -{ - as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack - if { { ac_try="$ac_link" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" -$as_echo "$ac_try_echo"; } >&5 - (eval "$ac_link") 2>&5 - ac_status=$? - $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; } && { ac_try='./conftest$ac_exeext' - { { case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" -$as_echo "$ac_try_echo"; } >&5 - (eval "$ac_try") 2>&5 - ac_status=$? - $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; }; }; then : - ac_retval=0 -else - $as_echo "$as_me: program exited with status $ac_status" >&5 - $as_echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - - ac_retval=$ac_status -fi - rm -rf conftest.dSYM conftest_ipa8_conftest.oo - eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno - as_fn_set_status $ac_retval - -} # ac_fn_c_try_run - -# ac_fn_c_check_header_mongrel LINENO HEADER VAR INCLUDES -# ------------------------------------------------------- -# Tests whether HEADER exists, giving a warning if it cannot be compiled using -# the include files in INCLUDES and setting the cache variable VAR -# accordingly. -ac_fn_c_check_header_mongrel () -{ - as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack - if eval \${$3+:} false; then : - { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 -$as_echo_n "checking for $2... " >&6; } -if eval \${$3+:} false; then : - $as_echo_n "(cached) " >&6 -fi -eval ac_res=\$$3 - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 -$as_echo "$ac_res" >&6; } -else - # Is the header compilable? -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking $2 usability" >&5 -$as_echo_n "checking $2 usability... " >&6; } -cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ -$4 -#include <$2> -_ACEOF -if ac_fn_c_try_compile "$LINENO"; then : - ac_header_compiler=yes -else - ac_header_compiler=no -fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_header_compiler" >&5 -$as_echo "$ac_header_compiler" >&6; } - -# Is the header present? -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking $2 presence" >&5 -$as_echo_n "checking $2 presence... " >&6; } -cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ -#include <$2> -_ACEOF -if ac_fn_c_try_cpp "$LINENO"; then : - ac_header_preproc=yes -else - ac_header_preproc=no -fi -rm -f conftest.err conftest.i conftest.$ac_ext -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_header_preproc" >&5 -$as_echo "$ac_header_preproc" >&6; } - -# So? What about this header? -case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in #(( - yes:no: ) - { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: accepted by the compiler, rejected by the preprocessor!" >&5 -$as_echo "$as_me: WARNING: $2: accepted by the compiler, rejected by the preprocessor!" >&2;} - { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: proceeding with the compiler's result" >&5 -$as_echo "$as_me: WARNING: $2: proceeding with the compiler's result" >&2;} - ;; - no:yes:* ) - { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: present but cannot be compiled" >&5 -$as_echo "$as_me: WARNING: $2: present but cannot be compiled" >&2;} - { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: check for missing prerequisite headers?" >&5 -$as_echo "$as_me: WARNING: $2: check for missing prerequisite headers?" >&2;} - { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: see the Autoconf documentation" >&5 -$as_echo "$as_me: WARNING: $2: see the Autoconf documentation" >&2;} - { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: section \"Present But Cannot Be Compiled\"" >&5 -$as_echo "$as_me: WARNING: $2: section \"Present But Cannot Be Compiled\"" >&2;} - { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: proceeding with the compiler's result" >&5 -$as_echo "$as_me: WARNING: $2: proceeding with the compiler's result" >&2;} - ;; -esac - { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 -$as_echo_n "checking for $2... " >&6; } -if eval \${$3+:} false; then : - $as_echo_n "(cached) " >&6 -else - eval "$3=\$ac_header_compiler" -fi -eval ac_res=\$$3 - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 -$as_echo "$ac_res" >&6; } -fi - eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno - -} # ac_fn_c_check_header_mongrel - # ac_fn_c_check_header_compile LINENO HEADER VAR INCLUDES # ------------------------------------------------------- # Tests whether HEADER exists and can be compiled using the include files in @@ -1772,49 +1677,54 @@ fi ac_fn_c_check_header_compile () { as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack - { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 -$as_echo_n "checking for $2... " >&6; } -if eval \${$3+:} false; then : - $as_echo_n "(cached) " >&6 -else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 +printf %s "checking for $2... " >&6; } +if eval test \${$3+y} +then : + printf %s "(cached) " >&6 +else $as_nop cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ $4 #include <$2> _ACEOF -if ac_fn_c_try_compile "$LINENO"; then : +if ac_fn_c_try_compile "$LINENO" +then : eval "$3=yes" -else +else $as_nop eval "$3=no" fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext fi eval ac_res=\$$3 - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 -$as_echo "$ac_res" >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +printf "%s\n" "$ac_res" >&6; } eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno } # ac_fn_c_check_header_compile -# ac_fn_c_check_decl LINENO SYMBOL VAR INCLUDES -# --------------------------------------------- +# ac_fn_check_decl LINENO SYMBOL VAR INCLUDES EXTRA-OPTIONS FLAG-VAR +# ------------------------------------------------------------------ # Tests whether SYMBOL is declared in INCLUDES, setting cache variable VAR -# accordingly. -ac_fn_c_check_decl () +# accordingly. Pass EXTRA-OPTIONS to the compiler, using FLAG-VAR. +ac_fn_check_decl () { as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack as_decl_name=`echo $2|sed 's/ *(.*//'` + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether $as_decl_name is declared" >&5 +printf %s "checking whether $as_decl_name is declared... " >&6; } +if eval test \${$3+y} +then : + printf %s "(cached) " >&6 +else $as_nop as_decl_use=`echo $2|sed -e 's/(/((/' -e 's/)/) 0&/' -e 's/,/) 0& (/g'` - { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $as_decl_name is declared" >&5 -$as_echo_n "checking whether $as_decl_name is declared... " >&6; } -if eval \${$3+:} false; then : - $as_echo_n "(cached) " >&6 -else + eval ac_save_FLAGS=\$$6 + as_fn_append $6 " $5" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ $4 int -main () +main (void) { #ifndef $as_decl_name #ifdef __cplusplus @@ -1828,19 +1738,22 @@ main () return 0; } _ACEOF -if ac_fn_c_try_compile "$LINENO"; then : +if ac_fn_c_try_compile "$LINENO" +then : eval "$3=yes" -else +else $as_nop eval "$3=no" fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + eval $6=\$ac_save_FLAGS + fi eval ac_res=\$$3 - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 -$as_echo "$ac_res" >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +printf "%s\n" "$ac_res" >&6; } eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno -} # ac_fn_c_check_decl +} # ac_fn_check_decl # ac_fn_c_check_type LINENO TYPE VAR INCLUDES # ------------------------------------------- @@ -1849,17 +1762,18 @@ $as_echo "$ac_res" >&6; } ac_fn_c_check_type () { as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack - { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 -$as_echo_n "checking for $2... " >&6; } -if eval \${$3+:} false; then : - $as_echo_n "(cached) " >&6 -else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 +printf %s "checking for $2... " >&6; } +if eval test \${$3+y} +then : + printf %s "(cached) " >&6 +else $as_nop eval "$3=no" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ $4 int -main () +main (void) { if (sizeof ($2)) return 0; @@ -1867,12 +1781,13 @@ if (sizeof ($2)) return 0; } _ACEOF -if ac_fn_c_try_compile "$LINENO"; then : +if ac_fn_c_try_compile "$LINENO" +then : cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ $4 int -main () +main (void) { if (sizeof (($2))) return 0; @@ -1880,18 +1795,19 @@ if (sizeof (($2))) return 0; } _ACEOF -if ac_fn_c_try_compile "$LINENO"; then : +if ac_fn_c_try_compile "$LINENO" +then : -else +else $as_nop eval "$3=yes" fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext fi eval ac_res=\$$3 - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 -$as_echo "$ac_res" >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +printf "%s\n" "$ac_res" >&6; } eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno } # ac_fn_c_check_type @@ -1902,14 +1818,14 @@ $as_echo "$ac_res" >&6; } ac_fn_c_try_link () { as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack - rm -f conftest.$ac_objext conftest$ac_exeext + rm -f conftest.$ac_objext conftest.beam conftest$ac_exeext if { { ac_try="$ac_link" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" -$as_echo "$ac_try_echo"; } >&5 +printf "%s\n" "$ac_try_echo"; } >&5 (eval "$ac_link") 2>conftest.err ac_status=$? if test -s conftest.err; then @@ -1917,17 +1833,18 @@ $as_echo "$ac_try_echo"; } >&5 cat conftest.er1 >&5 mv -f conftest.er1 conftest.err fi - $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } && { test -z "$ac_c_werror_flag" || test ! -s conftest.err } && test -s conftest$ac_exeext && { test "$cross_compiling" = yes || test -x conftest$ac_exeext - }; then : + } +then : ac_retval=0 -else - $as_echo "$as_me: failed program was:" >&5 +else $as_nop + printf "%s\n" "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_retval=1 @@ -1942,17 +1859,61 @@ fi } # ac_fn_c_try_link +# ac_fn_c_try_run LINENO +# ---------------------- +# Try to run conftest.$ac_ext, and return whether this succeeded. Assumes that +# executables *can* be run. +ac_fn_c_try_run () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + if { { ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +printf "%s\n" "$ac_try_echo"; } >&5 + (eval "$ac_link") 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } && { ac_try='./conftest$ac_exeext' + { { case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +printf "%s\n" "$ac_try_echo"; } >&5 + (eval "$ac_try") 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; } +then : + ac_retval=0 +else $as_nop + printf "%s\n" "$as_me: program exited with status $ac_status" >&5 + printf "%s\n" "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_retval=$ac_status +fi + rm -rf conftest.dSYM conftest_ipa8_conftest.oo + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + as_fn_set_status $ac_retval + +} # ac_fn_c_try_run + # ac_fn_c_check_func LINENO FUNC VAR # ---------------------------------- # Tests whether FUNC exists, setting the cache variable VAR accordingly ac_fn_c_check_func () { as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack - { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 -$as_echo_n "checking for $2... " >&6; } -if eval \${$3+:} false; then : - $as_echo_n "(cached) " >&6 -else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 +printf %s "checking for $2... " >&6; } +if eval test \${$3+y} +then : + printf %s "(cached) " >&6 +else $as_nop cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Define $2 to an innocuous variant, in case declares $2. @@ -1960,16 +1921,9 @@ else #define $2 innocuous_$2 /* System header to define __stub macros and hopefully few prototypes, - which can conflict with char $2 (); below. - Prefer to if __STDC__ is defined, since - exists even on freestanding compilers. */ - -#ifdef __STDC__ -# include -#else -# include -#endif + which can conflict with char $2 (); below. */ +#include #undef $2 /* Override any GCC internal prototype to avoid an error. @@ -1987,35 +1941,56 @@ choke me #endif int -main () +main (void) { return $2 (); ; return 0; } _ACEOF -if ac_fn_c_try_link "$LINENO"; then : +if ac_fn_c_try_link "$LINENO" +then : eval "$3=yes" -else +else $as_nop eval "$3=no" fi -rm -f core conftest.err conftest.$ac_objext \ +rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext fi eval ac_res=\$$3 - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 -$as_echo "$ac_res" >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +printf "%s\n" "$ac_res" >&6; } eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno } # ac_fn_c_check_func +ac_configure_args_raw= +for ac_arg +do + case $ac_arg in + *\'*) + ac_arg=`printf "%s\n" "$ac_arg" | sed "s/'/'\\\\\\\\''/g"` ;; + esac + as_fn_append ac_configure_args_raw " '$ac_arg'" +done + +case $ac_configure_args_raw in + *$as_nl*) + ac_safe_unquote= ;; + *) + ac_unsafe_z='|&;<>()$`\\"*?[ '' ' # This string ends in space, tab. + ac_unsafe_a="$ac_unsafe_z#~" + ac_safe_unquote="s/ '\\([^$ac_unsafe_a][^$ac_unsafe_z]*\\)'/ \\1/g" + ac_configure_args_raw=` printf "%s\n" "$ac_configure_args_raw" | sed "$ac_safe_unquote"`;; +esac + cat >config.log <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by cpuminer-opt $as_me 23.7, which was -generated by GNU Autoconf 2.69. Invocation command line was +It was created by cpuminer-opt $as_me 23.8, which was +generated by GNU Autoconf 2.71. Invocation command line was - $ $0 $@ + $ $0$ac_configure_args_raw _ACEOF exec 5>>config.log @@ -2048,8 +2023,12 @@ as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - $as_echo "PATH: $as_dir" + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + printf "%s\n" "PATH: $as_dir" done IFS=$as_save_IFS @@ -2084,7 +2063,7 @@ do | -silent | --silent | --silen | --sile | --sil) continue ;; *\'*) - ac_arg=`$as_echo "$ac_arg" | sed "s/'/'\\\\\\\\''/g"` ;; + ac_arg=`printf "%s\n" "$ac_arg" | sed "s/'/'\\\\\\\\''/g"` ;; esac case $ac_pass in 1) as_fn_append ac_configure_args0 " '$ac_arg'" ;; @@ -2119,11 +2098,13 @@ done # WARNING: Use '\'' to represent an apostrophe within the trap. # WARNING: Do not start the trap code with a newline, due to a FreeBSD 4.0 bug. trap 'exit_status=$? + # Sanitize IFS. + IFS=" "" $as_nl" # Save into config.log some information that might help in debugging. { echo - $as_echo "## ---------------- ## + printf "%s\n" "## ---------------- ## ## Cache variables. ## ## ---------------- ##" echo @@ -2134,8 +2115,8 @@ trap 'exit_status=$? case $ac_val in #( *${as_nl}*) case $ac_var in #( - *_cv_*) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: cache variable $ac_var contains a newline" >&5 -$as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;; + *_cv_*) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: cache variable $ac_var contains a newline" >&5 +printf "%s\n" "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;; esac case $ac_var in #( _ | IFS | as_nl) ;; #( @@ -2159,7 +2140,7 @@ $as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;; ) echo - $as_echo "## ----------------- ## + printf "%s\n" "## ----------------- ## ## Output variables. ## ## ----------------- ##" echo @@ -2167,14 +2148,14 @@ $as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;; do eval ac_val=\$$ac_var case $ac_val in - *\'\''*) ac_val=`$as_echo "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;; + *\'\''*) ac_val=`printf "%s\n" "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;; esac - $as_echo "$ac_var='\''$ac_val'\''" + printf "%s\n" "$ac_var='\''$ac_val'\''" done | sort echo if test -n "$ac_subst_files"; then - $as_echo "## ------------------- ## + printf "%s\n" "## ------------------- ## ## File substitutions. ## ## ------------------- ##" echo @@ -2182,15 +2163,15 @@ $as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;; do eval ac_val=\$$ac_var case $ac_val in - *\'\''*) ac_val=`$as_echo "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;; + *\'\''*) ac_val=`printf "%s\n" "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;; esac - $as_echo "$ac_var='\''$ac_val'\''" + printf "%s\n" "$ac_var='\''$ac_val'\''" done | sort echo fi if test -s confdefs.h; then - $as_echo "## ----------- ## + printf "%s\n" "## ----------- ## ## confdefs.h. ## ## ----------- ##" echo @@ -2198,8 +2179,8 @@ $as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;; echo fi test "$ac_signal" != 0 && - $as_echo "$as_me: caught signal $ac_signal" - $as_echo "$as_me: exit $exit_status" + printf "%s\n" "$as_me: caught signal $ac_signal" + printf "%s\n" "$as_me: exit $exit_status" } >&5 rm -f core *.core core.conftest.* && rm -f -r conftest* confdefs* conf$$* $ac_clean_files && @@ -2213,63 +2194,48 @@ ac_signal=0 # confdefs.h avoids OS command line length limits that DEFS can exceed. rm -f -r conftest* confdefs.h -$as_echo "/* confdefs.h */" > confdefs.h +printf "%s\n" "/* confdefs.h */" > confdefs.h # Predefined preprocessor variables. -cat >>confdefs.h <<_ACEOF -#define PACKAGE_NAME "$PACKAGE_NAME" -_ACEOF +printf "%s\n" "#define PACKAGE_NAME \"$PACKAGE_NAME\"" >>confdefs.h -cat >>confdefs.h <<_ACEOF -#define PACKAGE_TARNAME "$PACKAGE_TARNAME" -_ACEOF +printf "%s\n" "#define PACKAGE_TARNAME \"$PACKAGE_TARNAME\"" >>confdefs.h -cat >>confdefs.h <<_ACEOF -#define PACKAGE_VERSION "$PACKAGE_VERSION" -_ACEOF +printf "%s\n" "#define PACKAGE_VERSION \"$PACKAGE_VERSION\"" >>confdefs.h -cat >>confdefs.h <<_ACEOF -#define PACKAGE_STRING "$PACKAGE_STRING" -_ACEOF +printf "%s\n" "#define PACKAGE_STRING \"$PACKAGE_STRING\"" >>confdefs.h -cat >>confdefs.h <<_ACEOF -#define PACKAGE_BUGREPORT "$PACKAGE_BUGREPORT" -_ACEOF +printf "%s\n" "#define PACKAGE_BUGREPORT \"$PACKAGE_BUGREPORT\"" >>confdefs.h -cat >>confdefs.h <<_ACEOF -#define PACKAGE_URL "$PACKAGE_URL" -_ACEOF +printf "%s\n" "#define PACKAGE_URL \"$PACKAGE_URL\"" >>confdefs.h # Let the site file select an alternate cache file if it wants to. # Prefer an explicitly selected file to automatically selected ones. -ac_site_file1=NONE -ac_site_file2=NONE if test -n "$CONFIG_SITE"; then - # We do not want a PATH search for config.site. - case $CONFIG_SITE in #(( - -*) ac_site_file1=./$CONFIG_SITE;; - */*) ac_site_file1=$CONFIG_SITE;; - *) ac_site_file1=./$CONFIG_SITE;; - esac + ac_site_files="$CONFIG_SITE" elif test "x$prefix" != xNONE; then - ac_site_file1=$prefix/share/config.site - ac_site_file2=$prefix/etc/config.site + ac_site_files="$prefix/share/config.site $prefix/etc/config.site" else - ac_site_file1=$ac_default_prefix/share/config.site - ac_site_file2=$ac_default_prefix/etc/config.site + ac_site_files="$ac_default_prefix/share/config.site $ac_default_prefix/etc/config.site" fi -for ac_site_file in "$ac_site_file1" "$ac_site_file2" + +for ac_site_file in $ac_site_files do - test "x$ac_site_file" = xNONE && continue - if test /dev/null != "$ac_site_file" && test -r "$ac_site_file"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: loading site script $ac_site_file" >&5 -$as_echo "$as_me: loading site script $ac_site_file" >&6;} + case $ac_site_file in #( + */*) : + ;; #( + *) : + ac_site_file=./$ac_site_file ;; +esac + if test -f "$ac_site_file" && test -r "$ac_site_file"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: loading site script $ac_site_file" >&5 +printf "%s\n" "$as_me: loading site script $ac_site_file" >&6;} sed 's/^/| /' "$ac_site_file" >&5 . "$ac_site_file" \ - || { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 -$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} + || { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;} as_fn_error $? "failed to load site script $ac_site_file See \`config.log' for more details" "$LINENO" 5; } fi @@ -2279,138 +2245,745 @@ if test -r "$cache_file"; then # Some versions of bash will fail to source /dev/null (special files # actually), so we avoid doing that. DJGPP emulates it as a regular file. if test /dev/null != "$cache_file" && test -f "$cache_file"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: loading cache $cache_file" >&5 -$as_echo "$as_me: loading cache $cache_file" >&6;} + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: loading cache $cache_file" >&5 +printf "%s\n" "$as_me: loading cache $cache_file" >&6;} case $cache_file in [\\/]* | ?:[\\/]* ) . "$cache_file";; *) . "./$cache_file";; esac fi else - { $as_echo "$as_me:${as_lineno-$LINENO}: creating cache $cache_file" >&5 -$as_echo "$as_me: creating cache $cache_file" >&6;} + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: creating cache $cache_file" >&5 +printf "%s\n" "$as_me: creating cache $cache_file" >&6;} >$cache_file fi -# Check that the precious variables saved in the cache have kept the same -# value. -ac_cache_corrupted=false -for ac_var in $ac_precious_vars; do - eval ac_old_set=\$ac_cv_env_${ac_var}_set - eval ac_new_set=\$ac_env_${ac_var}_set - eval ac_old_val=\$ac_cv_env_${ac_var}_value - eval ac_new_val=\$ac_env_${ac_var}_value - case $ac_old_set,$ac_new_set in - set,) - { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&5 -$as_echo "$as_me: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&2;} - ac_cache_corrupted=: ;; - ,set) - { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' was not set in the previous run" >&5 -$as_echo "$as_me: error: \`$ac_var' was not set in the previous run" >&2;} - ac_cache_corrupted=: ;; - ,);; - *) - if test "x$ac_old_val" != "x$ac_new_val"; then - # differences in whitespace do not lead to failure. - ac_old_val_w=`echo x $ac_old_val` - ac_new_val_w=`echo x $ac_new_val` - if test "$ac_old_val_w" != "$ac_new_val_w"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' has changed since the previous run:" >&5 -$as_echo "$as_me: error: \`$ac_var' has changed since the previous run:" >&2;} - ac_cache_corrupted=: - else - { $as_echo "$as_me:${as_lineno-$LINENO}: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&5 -$as_echo "$as_me: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&2;} - eval $ac_var=\$ac_old_val - fi - { $as_echo "$as_me:${as_lineno-$LINENO}: former value: \`$ac_old_val'" >&5 -$as_echo "$as_me: former value: \`$ac_old_val'" >&2;} - { $as_echo "$as_me:${as_lineno-$LINENO}: current value: \`$ac_new_val'" >&5 -$as_echo "$as_me: current value: \`$ac_new_val'" >&2;} - fi;; - esac - # Pass precious variables to config.status. - if test "$ac_new_set" = set; then - case $ac_new_val in - *\'*) ac_arg=$ac_var=`$as_echo "$ac_new_val" | sed "s/'/'\\\\\\\\''/g"` ;; - *) ac_arg=$ac_var=$ac_new_val ;; - esac - case " $ac_configure_args " in - *" '$ac_arg' "*) ;; # Avoid dups. Use of quotes ensures accuracy. - *) as_fn_append ac_configure_args " '$ac_arg'" ;; - esac - fi -done -if $ac_cache_corrupted; then - { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 -$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} - { $as_echo "$as_me:${as_lineno-$LINENO}: error: changes in the environment can compromise the build" >&5 -$as_echo "$as_me: error: changes in the environment can compromise the build" >&2;} - as_fn_error $? "run \`make distclean' and/or \`rm $cache_file' and start over" "$LINENO" 5 -fi -## -------------------- ## -## Main body of script. ## -## -------------------- ## +# Test code for whether the C compiler supports C89 (global declarations) +ac_c_conftest_c89_globals=' +/* Does the compiler advertise C89 conformance? + Do not test the value of __STDC__, because some compilers set it to 0 + while being otherwise adequately conformant. */ +#if !defined __STDC__ +# error "Compiler does not advertise C89 conformance" +#endif -ac_ext=c -ac_cpp='$CPP $CPPFLAGS' -ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' -ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' -ac_compiler_gnu=$ac_cv_c_compiler_gnu +#include +#include +struct stat; +/* Most of the following tests are stolen from RCS 5.7 src/conf.sh. */ +struct buf { int x; }; +struct buf * (*rcsopen) (struct buf *, struct stat *, int); +static char *e (p, i) + char **p; + int i; +{ + return p[i]; +} +static char *f (char * (*g) (char **, int), char **p, ...) +{ + char *s; + va_list v; + va_start (v,p); + s = g (p, va_arg (v,int)); + va_end (v); + return s; +} + +/* OSF 4.0 Compaq cc is some sort of almost-ANSI by default. It has + function prototypes and stuff, but not \xHH hex character constants. + These do not provoke an error unfortunately, instead are silently treated + as an "x". The following induces an error, until -std is added to get + proper ANSI mode. Curiously \x00 != x always comes out true, for an + array size at least. It is necessary to write \x00 == 0 to get something + that is true only with -std. */ +int osf4_cc_array ['\''\x00'\'' == 0 ? 1 : -1]; + +/* IBM C 6 for AIX is almost-ANSI by default, but it replaces macro parameters + inside strings and character constants. */ +#define FOO(x) '\''x'\'' +int xlc6_cc_array[FOO(a) == '\''x'\'' ? 1 : -1]; + +int test (int i, double x); +struct s1 {int (*f) (int a);}; +struct s2 {int (*f) (double a);}; +int pairnames (int, char **, int *(*)(struct buf *, struct stat *, int), + int, int);' + +# Test code for whether the C compiler supports C89 (body of main). +ac_c_conftest_c89_main=' +ok |= (argc == 0 || f (e, argv, 0) != argv[0] || f (e, argv, 1) != argv[1]); +' + +# Test code for whether the C compiler supports C99 (global declarations) +ac_c_conftest_c99_globals=' +// Does the compiler advertise C99 conformance? +#if !defined __STDC_VERSION__ || __STDC_VERSION__ < 199901L +# error "Compiler does not advertise C99 conformance" +#endif + +#include +extern int puts (const char *); +extern int printf (const char *, ...); +extern int dprintf (int, const char *, ...); +extern void *malloc (size_t); + +// Check varargs macros. These examples are taken from C99 6.10.3.5. +// dprintf is used instead of fprintf to avoid needing to declare +// FILE and stderr. +#define debug(...) dprintf (2, __VA_ARGS__) +#define showlist(...) puts (#__VA_ARGS__) +#define report(test,...) ((test) ? puts (#test) : printf (__VA_ARGS__)) +static void +test_varargs_macros (void) +{ + int x = 1234; + int y = 5678; + debug ("Flag"); + debug ("X = %d\n", x); + showlist (The first, second, and third items.); + report (x>y, "x is %d but y is %d", x, y); +} + +// Check long long types. +#define BIG64 18446744073709551615ull +#define BIG32 4294967295ul +#define BIG_OK (BIG64 / BIG32 == 4294967297ull && BIG64 % BIG32 == 0) +#if !BIG_OK + #error "your preprocessor is broken" +#endif +#if BIG_OK +#else + #error "your preprocessor is broken" +#endif +static long long int bignum = -9223372036854775807LL; +static unsigned long long int ubignum = BIG64; + +struct incomplete_array +{ + int datasize; + double data[]; +}; + +struct named_init { + int number; + const wchar_t *name; + double average; +}; + +typedef const char *ccp; + +static inline int +test_restrict (ccp restrict text) +{ + // See if C++-style comments work. + // Iterate through items via the restricted pointer. + // Also check for declarations in for loops. + for (unsigned int i = 0; *(text+i) != '\''\0'\''; ++i) + continue; + return 0; +} + +// Check varargs and va_copy. +static bool +test_varargs (const char *format, ...) +{ + va_list args; + va_start (args, format); + va_list args_copy; + va_copy (args_copy, args); + + const char *str = ""; + int number = 0; + float fnumber = 0; + + while (*format) + { + switch (*format++) + { + case '\''s'\'': // string + str = va_arg (args_copy, const char *); + break; + case '\''d'\'': // int + number = va_arg (args_copy, int); + break; + case '\''f'\'': // float + fnumber = va_arg (args_copy, double); + break; + default: + break; + } + } + va_end (args_copy); + va_end (args); + + return *str && number && fnumber; +} +' + +# Test code for whether the C compiler supports C99 (body of main). +ac_c_conftest_c99_main=' + // Check bool. + _Bool success = false; + success |= (argc != 0); + + // Check restrict. + if (test_restrict ("String literal") == 0) + success = true; + char *restrict newvar = "Another string"; + + // Check varargs. + success &= test_varargs ("s, d'\'' f .", "string", 65, 34.234); + test_varargs_macros (); + + // Check flexible array members. + struct incomplete_array *ia = + malloc (sizeof (struct incomplete_array) + (sizeof (double) * 10)); + ia->datasize = 10; + for (int i = 0; i < ia->datasize; ++i) + ia->data[i] = i * 1.234; + + // Check named initializers. + struct named_init ni = { + .number = 34, + .name = L"Test wide string", + .average = 543.34343, + }; + + ni.number = 58; + + int dynamic_array[ni.number]; + dynamic_array[0] = argv[0][0]; + dynamic_array[ni.number - 1] = 543; + + // work around unused variable warnings + ok |= (!success || bignum == 0LL || ubignum == 0uLL || newvar[0] == '\''x'\'' + || dynamic_array[ni.number - 1] != 543); +' + +# Test code for whether the C compiler supports C11 (global declarations) +ac_c_conftest_c11_globals=' +// Does the compiler advertise C11 conformance? +#if !defined __STDC_VERSION__ || __STDC_VERSION__ < 201112L +# error "Compiler does not advertise C11 conformance" +#endif + +// Check _Alignas. +char _Alignas (double) aligned_as_double; +char _Alignas (0) no_special_alignment; +extern char aligned_as_int; +char _Alignas (0) _Alignas (int) aligned_as_int; + +// Check _Alignof. +enum +{ + int_alignment = _Alignof (int), + int_array_alignment = _Alignof (int[100]), + char_alignment = _Alignof (char) +}; +_Static_assert (0 < -_Alignof (int), "_Alignof is signed"); + +// Check _Noreturn. +int _Noreturn does_not_return (void) { for (;;) continue; } + +// Check _Static_assert. +struct test_static_assert +{ + int x; + _Static_assert (sizeof (int) <= sizeof (long int), + "_Static_assert does not work in struct"); + long int y; +}; + +// Check UTF-8 literals. +#define u8 syntax error! +char const utf8_literal[] = u8"happens to be ASCII" "another string"; + +// Check duplicate typedefs. +typedef long *long_ptr; +typedef long int *long_ptr; +typedef long_ptr long_ptr; + +// Anonymous structures and unions -- taken from C11 6.7.2.1 Example 1. +struct anonymous +{ + union { + struct { int i; int j; }; + struct { int k; long int l; } w; + }; + int m; +} v1; +' + +# Test code for whether the C compiler supports C11 (body of main). +ac_c_conftest_c11_main=' + _Static_assert ((offsetof (struct anonymous, i) + == offsetof (struct anonymous, w.k)), + "Anonymous union alignment botch"); + v1.i = 2; + v1.w.k = 5; + ok |= v1.i != 5; +' + +# Test code for whether the C compiler supports C11 (complete). +ac_c_conftest_c11_program="${ac_c_conftest_c89_globals} +${ac_c_conftest_c99_globals} +${ac_c_conftest_c11_globals} + +int +main (int argc, char **argv) +{ + int ok = 0; + ${ac_c_conftest_c89_main} + ${ac_c_conftest_c99_main} + ${ac_c_conftest_c11_main} + return ok; +} +" + +# Test code for whether the C compiler supports C99 (complete). +ac_c_conftest_c99_program="${ac_c_conftest_c89_globals} +${ac_c_conftest_c99_globals} + +int +main (int argc, char **argv) +{ + int ok = 0; + ${ac_c_conftest_c89_main} + ${ac_c_conftest_c99_main} + return ok; +} +" + +# Test code for whether the C compiler supports C89 (complete). +ac_c_conftest_c89_program="${ac_c_conftest_c89_globals} + +int +main (int argc, char **argv) +{ + int ok = 0; + ${ac_c_conftest_c89_main} + return ok; +} +" + +# Test code for whether the C++ compiler supports C++98 (global declarations) +ac_cxx_conftest_cxx98_globals=' +// Does the compiler advertise C++98 conformance? +#if !defined __cplusplus || __cplusplus < 199711L +# error "Compiler does not advertise C++98 conformance" +#endif + +// These inclusions are to reject old compilers that +// lack the unsuffixed header files. +#include +#include + +// and are *not* freestanding headers in C++98. +extern void assert (int); +namespace std { + extern int strcmp (const char *, const char *); +} + +// Namespaces, exceptions, and templates were all added after "C++ 2.0". +using std::exception; +using std::strcmp; + +namespace { + +void test_exception_syntax() +{ + try { + throw "test"; + } catch (const char *s) { + // Extra parentheses suppress a warning when building autoconf itself, + // due to lint rules shared with more typical C programs. + assert (!(strcmp) (s, "test")); + } +} + +template struct test_template +{ + T const val; + explicit test_template(T t) : val(t) {} + template T add(U u) { return static_cast(u) + val; } +}; + +} // anonymous namespace +' + +# Test code for whether the C++ compiler supports C++98 (body of main) +ac_cxx_conftest_cxx98_main=' + assert (argc); + assert (! argv[0]); +{ + test_exception_syntax (); + test_template tt (2.0); + assert (tt.add (4) == 6.0); + assert (true && !false); +} +' + +# Test code for whether the C++ compiler supports C++11 (global declarations) +ac_cxx_conftest_cxx11_globals=' +// Does the compiler advertise C++ 2011 conformance? +#if !defined __cplusplus || __cplusplus < 201103L +# error "Compiler does not advertise C++11 conformance" +#endif + +namespace cxx11test +{ + constexpr int get_val() { return 20; } + + struct testinit + { + int i; + double d; + }; + class delegate + { + public: + delegate(int n) : n(n) {} + delegate(): delegate(2354) {} + + virtual int getval() { return this->n; }; + protected: + int n; + }; + + class overridden : public delegate + { + public: + overridden(int n): delegate(n) {} + virtual int getval() override final { return this->n * 2; } + }; + + class nocopy + { + public: + nocopy(int i): i(i) {} + nocopy() = default; + nocopy(const nocopy&) = delete; + nocopy & operator=(const nocopy&) = delete; + private: + int i; + }; + + // for testing lambda expressions + template Ret eval(Fn f, Ret v) + { + return f(v); + } + + // for testing variadic templates and trailing return types + template auto sum(V first) -> V + { + return first; + } + template auto sum(V first, Args... rest) -> V + { + return first + sum(rest...); + } +} +' + +# Test code for whether the C++ compiler supports C++11 (body of main) +ac_cxx_conftest_cxx11_main=' +{ + // Test auto and decltype + auto a1 = 6538; + auto a2 = 48573953.4; + auto a3 = "String literal"; + + int total = 0; + for (auto i = a3; *i; ++i) { total += *i; } + + decltype(a2) a4 = 34895.034; +} +{ + // Test constexpr + short sa[cxx11test::get_val()] = { 0 }; +} +{ + // Test initializer lists + cxx11test::testinit il = { 4323, 435234.23544 }; +} +{ + // Test range-based for + int array[] = {9, 7, 13, 15, 4, 18, 12, 10, 5, 3, + 14, 19, 17, 8, 6, 20, 16, 2, 11, 1}; + for (auto &x : array) { x += 23; } +} +{ + // Test lambda expressions + using cxx11test::eval; + assert (eval ([](int x) { return x*2; }, 21) == 42); + double d = 2.0; + assert (eval ([&](double x) { return d += x; }, 3.0) == 5.0); + assert (d == 5.0); + assert (eval ([=](double x) mutable { return d += x; }, 4.0) == 9.0); + assert (d == 5.0); +} +{ + // Test use of variadic templates + using cxx11test::sum; + auto a = sum(1); + auto b = sum(1, 2); + auto c = sum(1.0, 2.0, 3.0); +} +{ + // Test constructor delegation + cxx11test::delegate d1; + cxx11test::delegate d2(); + cxx11test::delegate d3(45); +} +{ + // Test override and final + cxx11test::overridden o1(55464); +} +{ + // Test nullptr + char *c = nullptr; +} +{ + // Test template brackets + test_template<::test_template> v(test_template(12)); +} +{ + // Unicode literals + char const *utf8 = u8"UTF-8 string \u2500"; + char16_t const *utf16 = u"UTF-8 string \u2500"; + char32_t const *utf32 = U"UTF-32 string \u2500"; +} +' + +# Test code for whether the C compiler supports C++11 (complete). +ac_cxx_conftest_cxx11_program="${ac_cxx_conftest_cxx98_globals} +${ac_cxx_conftest_cxx11_globals} + +int +main (int argc, char **argv) +{ + int ok = 0; + ${ac_cxx_conftest_cxx98_main} + ${ac_cxx_conftest_cxx11_main} + return ok; +} +" +# Test code for whether the C compiler supports C++98 (complete). +ac_cxx_conftest_cxx98_program="${ac_cxx_conftest_cxx98_globals} +int +main (int argc, char **argv) +{ + int ok = 0; + ${ac_cxx_conftest_cxx98_main} + return ok; +} +" +as_fn_append ac_header_c_list " stdio.h stdio_h HAVE_STDIO_H" +as_fn_append ac_header_c_list " stdlib.h stdlib_h HAVE_STDLIB_H" +as_fn_append ac_header_c_list " string.h string_h HAVE_STRING_H" +as_fn_append ac_header_c_list " inttypes.h inttypes_h HAVE_INTTYPES_H" +as_fn_append ac_header_c_list " stdint.h stdint_h HAVE_STDINT_H" +as_fn_append ac_header_c_list " strings.h strings_h HAVE_STRINGS_H" +as_fn_append ac_header_c_list " sys/stat.h sys_stat_h HAVE_SYS_STAT_H" +as_fn_append ac_header_c_list " sys/types.h sys_types_h HAVE_SYS_TYPES_H" +as_fn_append ac_header_c_list " unistd.h unistd_h HAVE_UNISTD_H" + +# Auxiliary files required by this configure script. +ac_aux_files="compile missing install-sh config.guess config.sub" + +# Locations in which to look for auxiliary files. +ac_aux_dir_candidates="${srcdir}${PATH_SEPARATOR}${srcdir}/..${PATH_SEPARATOR}${srcdir}/../.." + +# Search for a directory containing all of the required auxiliary files, +# $ac_aux_files, from the $PATH-style list $ac_aux_dir_candidates. +# If we don't find one directory that contains all the files we need, +# we report the set of missing files from the *first* directory in +# $ac_aux_dir_candidates and give up. +ac_missing_aux_files="" +ac_first_candidate=: +printf "%s\n" "$as_me:${as_lineno-$LINENO}: looking for aux files: $ac_aux_files" >&5 +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +as_found=false +for as_dir in $ac_aux_dir_candidates +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + as_found=: -ac_aux_dir= -for ac_dir in "$srcdir" "$srcdir/.." "$srcdir/../.."; do - if test -f "$ac_dir/install-sh"; then - ac_aux_dir=$ac_dir - ac_install_sh="$ac_aux_dir/install-sh -c" - break - elif test -f "$ac_dir/install.sh"; then - ac_aux_dir=$ac_dir - ac_install_sh="$ac_aux_dir/install.sh -c" - break - elif test -f "$ac_dir/shtool"; then - ac_aux_dir=$ac_dir - ac_install_sh="$ac_aux_dir/shtool install -c" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: trying $as_dir" >&5 + ac_aux_dir_found=yes + ac_install_sh= + for ac_aux in $ac_aux_files + do + # As a special case, if "install-sh" is required, that requirement + # can be satisfied by any of "install-sh", "install.sh", or "shtool", + # and $ac_install_sh is set appropriately for whichever one is found. + if test x"$ac_aux" = x"install-sh" + then + if test -f "${as_dir}install-sh"; then + printf "%s\n" "$as_me:${as_lineno-$LINENO}: ${as_dir}install-sh found" >&5 + ac_install_sh="${as_dir}install-sh -c" + elif test -f "${as_dir}install.sh"; then + printf "%s\n" "$as_me:${as_lineno-$LINENO}: ${as_dir}install.sh found" >&5 + ac_install_sh="${as_dir}install.sh -c" + elif test -f "${as_dir}shtool"; then + printf "%s\n" "$as_me:${as_lineno-$LINENO}: ${as_dir}shtool found" >&5 + ac_install_sh="${as_dir}shtool install -c" + else + ac_aux_dir_found=no + if $ac_first_candidate; then + ac_missing_aux_files="${ac_missing_aux_files} install-sh" + else + break + fi + fi + else + if test -f "${as_dir}${ac_aux}"; then + printf "%s\n" "$as_me:${as_lineno-$LINENO}: ${as_dir}${ac_aux} found" >&5 + else + ac_aux_dir_found=no + if $ac_first_candidate; then + ac_missing_aux_files="${ac_missing_aux_files} ${ac_aux}" + else + break + fi + fi + fi + done + if test "$ac_aux_dir_found" = yes; then + ac_aux_dir="$as_dir" break fi + ac_first_candidate=false + + as_found=false done -if test -z "$ac_aux_dir"; then - as_fn_error $? "cannot find install-sh, install.sh, or shtool in \"$srcdir\" \"$srcdir/..\" \"$srcdir/../..\"" "$LINENO" 5 +IFS=$as_save_IFS +if $as_found +then : + +else $as_nop + as_fn_error $? "cannot find required auxiliary files:$ac_missing_aux_files" "$LINENO" 5 fi + # These three variables are undocumented and unsupported, # and are intended to be withdrawn in a future Autoconf release. # They can cause serious problems if a builder's source tree is in a directory # whose full name contains unusual characters. -ac_config_guess="$SHELL $ac_aux_dir/config.guess" # Please don't use this var. -ac_config_sub="$SHELL $ac_aux_dir/config.sub" # Please don't use this var. -ac_configure="$SHELL $ac_aux_dir/configure" # Please don't use this var. +if test -f "${ac_aux_dir}config.guess"; then + ac_config_guess="$SHELL ${ac_aux_dir}config.guess" +fi +if test -f "${ac_aux_dir}config.sub"; then + ac_config_sub="$SHELL ${ac_aux_dir}config.sub" +fi +if test -f "$ac_aux_dir/configure"; then + ac_configure="$SHELL ${ac_aux_dir}configure" +fi + +# Check that the precious variables saved in the cache have kept the same +# value. +ac_cache_corrupted=false +for ac_var in $ac_precious_vars; do + eval ac_old_set=\$ac_cv_env_${ac_var}_set + eval ac_new_set=\$ac_env_${ac_var}_set + eval ac_old_val=\$ac_cv_env_${ac_var}_value + eval ac_new_val=\$ac_env_${ac_var}_value + case $ac_old_set,$ac_new_set in + set,) + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&5 +printf "%s\n" "$as_me: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&2;} + ac_cache_corrupted=: ;; + ,set) + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' was not set in the previous run" >&5 +printf "%s\n" "$as_me: error: \`$ac_var' was not set in the previous run" >&2;} + ac_cache_corrupted=: ;; + ,);; + *) + if test "x$ac_old_val" != "x$ac_new_val"; then + # differences in whitespace do not lead to failure. + ac_old_val_w=`echo x $ac_old_val` + ac_new_val_w=`echo x $ac_new_val` + if test "$ac_old_val_w" != "$ac_new_val_w"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' has changed since the previous run:" >&5 +printf "%s\n" "$as_me: error: \`$ac_var' has changed since the previous run:" >&2;} + ac_cache_corrupted=: + else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&5 +printf "%s\n" "$as_me: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&2;} + eval $ac_var=\$ac_old_val + fi + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: former value: \`$ac_old_val'" >&5 +printf "%s\n" "$as_me: former value: \`$ac_old_val'" >&2;} + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: current value: \`$ac_new_val'" >&5 +printf "%s\n" "$as_me: current value: \`$ac_new_val'" >&2;} + fi;; + esac + # Pass precious variables to config.status. + if test "$ac_new_set" = set; then + case $ac_new_val in + *\'*) ac_arg=$ac_var=`printf "%s\n" "$ac_new_val" | sed "s/'/'\\\\\\\\''/g"` ;; + *) ac_arg=$ac_var=$ac_new_val ;; + esac + case " $ac_configure_args " in + *" '$ac_arg' "*) ;; # Avoid dups. Use of quotes ensures accuracy. + *) as_fn_append ac_configure_args " '$ac_arg'" ;; + esac + fi +done +if $ac_cache_corrupted; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;} + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: changes in the environment can compromise the build" >&5 +printf "%s\n" "$as_me: error: changes in the environment can compromise the build" >&2;} + as_fn_error $? "run \`${MAKE-make} distclean' and/or \`rm $cache_file' + and start over" "$LINENO" 5 +fi +## -------------------- ## +## Main body of script. ## +## -------------------- ## + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu -# Make sure we can run config.sub. -$SHELL "$ac_aux_dir/config.sub" sun4 >/dev/null 2>&1 || - as_fn_error $? "cannot run $SHELL $ac_aux_dir/config.sub" "$LINENO" 5 -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking build system type" >&5 -$as_echo_n "checking build system type... " >&6; } -if ${ac_cv_build+:} false; then : - $as_echo_n "(cached) " >&6 -else + + + + + # Make sure we can run config.sub. +$SHELL "${ac_aux_dir}config.sub" sun4 >/dev/null 2>&1 || + as_fn_error $? "cannot run $SHELL ${ac_aux_dir}config.sub" "$LINENO" 5 + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking build system type" >&5 +printf %s "checking build system type... " >&6; } +if test ${ac_cv_build+y} +then : + printf %s "(cached) " >&6 +else $as_nop ac_build_alias=$build_alias test "x$ac_build_alias" = x && - ac_build_alias=`$SHELL "$ac_aux_dir/config.guess"` + ac_build_alias=`$SHELL "${ac_aux_dir}config.guess"` test "x$ac_build_alias" = x && as_fn_error $? "cannot guess build type; you must specify one" "$LINENO" 5 -ac_cv_build=`$SHELL "$ac_aux_dir/config.sub" $ac_build_alias` || - as_fn_error $? "$SHELL $ac_aux_dir/config.sub $ac_build_alias failed" "$LINENO" 5 +ac_cv_build=`$SHELL "${ac_aux_dir}config.sub" $ac_build_alias` || + as_fn_error $? "$SHELL ${ac_aux_dir}config.sub $ac_build_alias failed" "$LINENO" 5 fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_build" >&5 -$as_echo "$ac_cv_build" >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_build" >&5 +printf "%s\n" "$ac_cv_build" >&6; } case $ac_cv_build in *-*-*) ;; *) as_fn_error $? "invalid value of canonical build" "$LINENO" 5;; @@ -2429,21 +3002,22 @@ IFS=$ac_save_IFS case $build_os in *\ *) build_os=`echo "$build_os" | sed 's/ /-/g'`;; esac -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking host system type" >&5 -$as_echo_n "checking host system type... " >&6; } -if ${ac_cv_host+:} false; then : - $as_echo_n "(cached) " >&6 -else +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking host system type" >&5 +printf %s "checking host system type... " >&6; } +if test ${ac_cv_host+y} +then : + printf %s "(cached) " >&6 +else $as_nop if test "x$host_alias" = x; then ac_cv_host=$ac_cv_build else - ac_cv_host=`$SHELL "$ac_aux_dir/config.sub" $host_alias` || - as_fn_error $? "$SHELL $ac_aux_dir/config.sub $host_alias failed" "$LINENO" 5 + ac_cv_host=`$SHELL "${ac_aux_dir}config.sub" $host_alias` || + as_fn_error $? "$SHELL ${ac_aux_dir}config.sub $host_alias failed" "$LINENO" 5 fi fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_host" >&5 -$as_echo "$ac_cv_host" >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_host" >&5 +printf "%s\n" "$ac_cv_host" >&6; } case $ac_cv_host in *-*-*) ;; *) as_fn_error $? "invalid value of canonical host" "$LINENO" 5;; @@ -2462,21 +3036,22 @@ IFS=$ac_save_IFS case $host_os in *\ *) host_os=`echo "$host_os" | sed 's/ /-/g'`;; esac -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking target system type" >&5 -$as_echo_n "checking target system type... " >&6; } -if ${ac_cv_target+:} false; then : - $as_echo_n "(cached) " >&6 -else +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking target system type" >&5 +printf %s "checking target system type... " >&6; } +if test ${ac_cv_target+y} +then : + printf %s "(cached) " >&6 +else $as_nop if test "x$target_alias" = x; then ac_cv_target=$ac_cv_host else - ac_cv_target=`$SHELL "$ac_aux_dir/config.sub" $target_alias` || - as_fn_error $? "$SHELL $ac_aux_dir/config.sub $target_alias failed" "$LINENO" 5 + ac_cv_target=`$SHELL "${ac_aux_dir}config.sub" $target_alias` || + as_fn_error $? "$SHELL ${ac_aux_dir}config.sub $target_alias failed" "$LINENO" 5 fi fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_target" >&5 -$as_echo "$ac_cv_target" >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_target" >&5 +printf "%s\n" "$ac_cv_target" >&6; } case $ac_cv_target in *-*-*) ;; *) as_fn_error $? "invalid value of canonical target" "$LINENO" 5;; @@ -2509,7 +3084,8 @@ ac_config_headers="$ac_config_headers cpuminer-config.h" am__api_version='1.16' -# Find a good install program. We prefer a C program (faster), + + # Find a good install program. We prefer a C program (faster), # so one script is as good as another. But avoid the broken or # incompatible versions: # SysV /etc/install, /usr/sbin/install @@ -2523,20 +3099,25 @@ am__api_version='1.16' # OS/2's system install, which has a completely different semantic # ./install, which can be erroneously created by make from ./install.sh. # Reject install programs that cannot install multiple files. -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for a BSD-compatible install" >&5 -$as_echo_n "checking for a BSD-compatible install... " >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for a BSD-compatible install" >&5 +printf %s "checking for a BSD-compatible install... " >&6; } if test -z "$INSTALL"; then -if ${ac_cv_path_install+:} false; then : - $as_echo_n "(cached) " >&6 -else +if test ${ac_cv_path_install+y} +then : + printf %s "(cached) " >&6 +else $as_nop as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - # Account for people who put trailing slashes in PATH elements. -case $as_dir/ in #(( - ./ | .// | /[cC]/* | \ + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + # Account for fact that we put trailing slashes in our PATH walk. +case $as_dir in #(( + ./ | /[cC]/* | \ /etc/* | /usr/sbin/* | /usr/etc/* | /sbin/* | /usr/afsws/bin/* | \ ?:[\\/]os2[\\/]install[\\/]* | ?:[\\/]OS2[\\/]INSTALL[\\/]* | \ /usr/ucb/* ) ;; @@ -2546,13 +3127,13 @@ case $as_dir/ in #(( # by default. for ac_prog in ginstall scoinst install; do for ac_exec_ext in '' $ac_executable_extensions; do - if as_fn_executable_p "$as_dir/$ac_prog$ac_exec_ext"; then + if as_fn_executable_p "$as_dir$ac_prog$ac_exec_ext"; then if test $ac_prog = install && - grep dspmsg "$as_dir/$ac_prog$ac_exec_ext" >/dev/null 2>&1; then + grep dspmsg "$as_dir$ac_prog$ac_exec_ext" >/dev/null 2>&1; then # AIX install. It has an incompatible calling convention. : elif test $ac_prog = install && - grep pwplus "$as_dir/$ac_prog$ac_exec_ext" >/dev/null 2>&1; then + grep pwplus "$as_dir$ac_prog$ac_exec_ext" >/dev/null 2>&1; then # program-specific install script used by HP pwplus--don't use. : else @@ -2560,12 +3141,12 @@ case $as_dir/ in #(( echo one > conftest.one echo two > conftest.two mkdir conftest.dir - if "$as_dir/$ac_prog$ac_exec_ext" -c conftest.one conftest.two "`pwd`/conftest.dir" && + if "$as_dir$ac_prog$ac_exec_ext" -c conftest.one conftest.two "`pwd`/conftest.dir/" && test -s conftest.one && test -s conftest.two && test -s conftest.dir/conftest.one && test -s conftest.dir/conftest.two then - ac_cv_path_install="$as_dir/$ac_prog$ac_exec_ext -c" + ac_cv_path_install="$as_dir$ac_prog$ac_exec_ext -c" break 3 fi fi @@ -2581,7 +3162,7 @@ IFS=$as_save_IFS rm -rf conftest.one conftest.two conftest.dir fi - if test "${ac_cv_path_install+set}" = set; then + if test ${ac_cv_path_install+y}; then INSTALL=$ac_cv_path_install else # As a last resort, use the slow shell script. Don't cache a @@ -2591,8 +3172,8 @@ fi INSTALL=$ac_install_sh fi fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $INSTALL" >&5 -$as_echo "$INSTALL" >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $INSTALL" >&5 +printf "%s\n" "$INSTALL" >&6; } # Use test -z because SunOS4 sh mishandles braces in ${var-val}. # It thinks the first close brace ends the variable substitution. @@ -2602,8 +3183,8 @@ test -z "$INSTALL_SCRIPT" && INSTALL_SCRIPT='${INSTALL}' test -z "$INSTALL_DATA" && INSTALL_DATA='${INSTALL} -m 644' -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether build environment is sane" >&5 -$as_echo_n "checking whether build environment is sane... " >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether build environment is sane" >&5 +printf %s "checking whether build environment is sane... " >&6; } # Reject unsafe characters in $srcdir or the absolute working directory # name. Accept space and tab only in the latter. am_lf=' @@ -2657,8 +3238,8 @@ else as_fn_error $? "newly created file is older than distributed files! Check your system clock" "$LINENO" 5 fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 -$as_echo "yes" >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } # If we didn't sleep, we still need to ensure time stamps of config.status and # generated files are strictly newer. am_sleep_pid= @@ -2677,26 +3258,23 @@ test "$program_suffix" != NONE && # Double any \ or $. # By default was `s,x,x', remove it if useless. ac_script='s/[\\$]/&&/g;s/;s,x,x,$//' -program_transform_name=`$as_echo "$program_transform_name" | sed "$ac_script"` +program_transform_name=`printf "%s\n" "$program_transform_name" | sed "$ac_script"` + # Expand $ac_aux_dir to an absolute path. am_aux_dir=`cd "$ac_aux_dir" && pwd` -if test x"${MISSING+set}" != xset; then - case $am_aux_dir in - *\ * | *\ *) - MISSING="\${SHELL} \"$am_aux_dir/missing\"" ;; - *) - MISSING="\${SHELL} $am_aux_dir/missing" ;; - esac + + if test x"${MISSING+set}" != xset; then + MISSING="\${SHELL} '$am_aux_dir/missing'" fi # Use eval to expand $SHELL if eval "$MISSING --is-lightweight"; then am_missing_run="$MISSING " else am_missing_run= - { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: 'missing' script is too old or missing" >&5 -$as_echo "$as_me: WARNING: 'missing' script is too old or missing" >&2;} + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: 'missing' script is too old or missing" >&5 +printf "%s\n" "$as_me: WARNING: 'missing' script is too old or missing" >&2;} fi if test x"${install_sh+set}" != xset; then @@ -2716,11 +3294,12 @@ if test "$cross_compiling" != no; then if test -n "$ac_tool_prefix"; then # Extract the first word of "${ac_tool_prefix}strip", so it can be a program name with args. set dummy ${ac_tool_prefix}strip; ac_word=$2 -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 -$as_echo_n "checking for $ac_word... " >&6; } -if ${ac_cv_prog_STRIP+:} false; then : - $as_echo_n "(cached) " >&6 -else +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_STRIP+y} +then : + printf %s "(cached) " >&6 +else $as_nop if test -n "$STRIP"; then ac_cv_prog_STRIP="$STRIP" # Let the user override the test. else @@ -2728,11 +3307,15 @@ as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac for ac_exec_ext in '' $ac_executable_extensions; do - if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_prog_STRIP="${ac_tool_prefix}strip" - $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done @@ -2743,11 +3326,11 @@ fi fi STRIP=$ac_cv_prog_STRIP if test -n "$STRIP"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $STRIP" >&5 -$as_echo "$STRIP" >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $STRIP" >&5 +printf "%s\n" "$STRIP" >&6; } else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } fi @@ -2756,11 +3339,12 @@ if test -z "$ac_cv_prog_STRIP"; then ac_ct_STRIP=$STRIP # Extract the first word of "strip", so it can be a program name with args. set dummy strip; ac_word=$2 -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 -$as_echo_n "checking for $ac_word... " >&6; } -if ${ac_cv_prog_ac_ct_STRIP+:} false; then : - $as_echo_n "(cached) " >&6 -else +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_ac_ct_STRIP+y} +then : + printf %s "(cached) " >&6 +else $as_nop if test -n "$ac_ct_STRIP"; then ac_cv_prog_ac_ct_STRIP="$ac_ct_STRIP" # Let the user override the test. else @@ -2768,11 +3352,15 @@ as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac for ac_exec_ext in '' $ac_executable_extensions; do - if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_prog_ac_ct_STRIP="strip" - $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done @@ -2783,11 +3371,11 @@ fi fi ac_ct_STRIP=$ac_cv_prog_ac_ct_STRIP if test -n "$ac_ct_STRIP"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_STRIP" >&5 -$as_echo "$ac_ct_STRIP" >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_STRIP" >&5 +printf "%s\n" "$ac_ct_STRIP" >&6; } else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } fi if test "x$ac_ct_STRIP" = x; then @@ -2795,8 +3383,8 @@ fi else case $cross_compiling:$ac_tool_warned in yes:) -{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 -$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} ac_tool_warned=yes ;; esac STRIP=$ac_ct_STRIP @@ -2808,25 +3396,31 @@ fi fi INSTALL_STRIP_PROGRAM="\$(install_sh) -c -s" -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for a thread-safe mkdir -p" >&5 -$as_echo_n "checking for a thread-safe mkdir -p... " >&6; } + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for a race-free mkdir -p" >&5 +printf %s "checking for a race-free mkdir -p... " >&6; } if test -z "$MKDIR_P"; then - if ${ac_cv_path_mkdir+:} false; then : - $as_echo_n "(cached) " >&6 -else + if test ${ac_cv_path_mkdir+y} +then : + printf %s "(cached) " >&6 +else $as_nop as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH$PATH_SEPARATOR/opt/sfw/bin do IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac for ac_prog in mkdir gmkdir; do for ac_exec_ext in '' $ac_executable_extensions; do - as_fn_executable_p "$as_dir/$ac_prog$ac_exec_ext" || continue - case `"$as_dir/$ac_prog$ac_exec_ext" --version 2>&1` in #( - 'mkdir (GNU coreutils) '* | \ - 'mkdir (coreutils) '* | \ + as_fn_executable_p "$as_dir$ac_prog$ac_exec_ext" || continue + case `"$as_dir$ac_prog$ac_exec_ext" --version 2>&1` in #( + 'mkdir ('*'coreutils) '* | \ + 'BusyBox '* | \ 'mkdir (fileutils) '4.1*) - ac_cv_path_mkdir=$as_dir/$ac_prog$ac_exec_ext + ac_cv_path_mkdir=$as_dir$ac_prog$ac_exec_ext break 3;; esac done @@ -2837,7 +3431,7 @@ IFS=$as_save_IFS fi test -d ./--version && rmdir ./--version - if test "${ac_cv_path_mkdir+set}" = set; then + if test ${ac_cv_path_mkdir+y}; then MKDIR_P="$ac_cv_path_mkdir -p" else # As a last resort, use the slow shell script. Don't cache a @@ -2847,18 +3441,19 @@ fi MKDIR_P="$ac_install_sh -d" fi fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $MKDIR_P" >&5 -$as_echo "$MKDIR_P" >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $MKDIR_P" >&5 +printf "%s\n" "$MKDIR_P" >&6; } for ac_prog in gawk mawk nawk awk do # Extract the first word of "$ac_prog", so it can be a program name with args. set dummy $ac_prog; ac_word=$2 -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 -$as_echo_n "checking for $ac_word... " >&6; } -if ${ac_cv_prog_AWK+:} false; then : - $as_echo_n "(cached) " >&6 -else +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_AWK+y} +then : + printf %s "(cached) " >&6 +else $as_nop if test -n "$AWK"; then ac_cv_prog_AWK="$AWK" # Let the user override the test. else @@ -2866,11 +3461,15 @@ as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac for ac_exec_ext in '' $ac_executable_extensions; do - if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_prog_AWK="$ac_prog" - $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done @@ -2881,24 +3480,25 @@ fi fi AWK=$ac_cv_prog_AWK if test -n "$AWK"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $AWK" >&5 -$as_echo "$AWK" >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $AWK" >&5 +printf "%s\n" "$AWK" >&6; } else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } fi test -n "$AWK" && break done -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether ${MAKE-make} sets \$(MAKE)" >&5 -$as_echo_n "checking whether ${MAKE-make} sets \$(MAKE)... " >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether ${MAKE-make} sets \$(MAKE)" >&5 +printf %s "checking whether ${MAKE-make} sets \$(MAKE)... " >&6; } set x ${MAKE-make} -ac_make=`$as_echo "$2" | sed 's/+/p/g; s/[^a-zA-Z0-9_]/_/g'` -if eval \${ac_cv_prog_make_${ac_make}_set+:} false; then : - $as_echo_n "(cached) " >&6 -else +ac_make=`printf "%s\n" "$2" | sed 's/+/p/g; s/[^a-zA-Z0-9_]/_/g'` +if eval test \${ac_cv_prog_make_${ac_make}_set+y} +then : + printf %s "(cached) " >&6 +else $as_nop cat >conftest.make <<\_ACEOF SHELL = /bin/sh all: @@ -2914,12 +3514,12 @@ esac rm -f conftest.make fi if eval test \$ac_cv_prog_make_${ac_make}_set = yes; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 -$as_echo "yes" >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } SET_MAKE= else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } SET_MAKE="MAKE=${MAKE-make}" fi @@ -2933,7 +3533,8 @@ fi rmdir .tst 2>/dev/null # Check whether --enable-silent-rules was given. -if test "${enable_silent_rules+set}" = set; then : +if test ${enable_silent_rules+y} +then : enableval=$enable_silent_rules; fi @@ -2943,12 +3544,13 @@ case $enable_silent_rules in # ((( *) AM_DEFAULT_VERBOSITY=1;; esac am_make=${MAKE-make} -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $am_make supports nested variables" >&5 -$as_echo_n "checking whether $am_make supports nested variables... " >&6; } -if ${am_cv_make_support_nested_variables+:} false; then : - $as_echo_n "(cached) " >&6 -else - if $as_echo 'TRUE=$(BAR$(V)) +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether $am_make supports nested variables" >&5 +printf %s "checking whether $am_make supports nested variables... " >&6; } +if test ${am_cv_make_support_nested_variables+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if printf "%s\n" 'TRUE=$(BAR$(V)) BAR0=false BAR1=true V=1 @@ -2960,8 +3562,8 @@ else am_cv_make_support_nested_variables=no fi fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $am_cv_make_support_nested_variables" >&5 -$as_echo "$am_cv_make_support_nested_variables" >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $am_cv_make_support_nested_variables" >&5 +printf "%s\n" "$am_cv_make_support_nested_variables" >&6; } if test $am_cv_make_support_nested_variables = yes; then AM_V='$(V)' AM_DEFAULT_V='$(AM_DEFAULT_VERBOSITY)' @@ -2993,17 +3595,13 @@ fi # Define the identity of the package. PACKAGE='cpuminer-opt' - VERSION='23.7' + VERSION='23.8' -cat >>confdefs.h <<_ACEOF -#define PACKAGE "$PACKAGE" -_ACEOF +printf "%s\n" "#define PACKAGE \"$PACKAGE\"" >>confdefs.h -cat >>confdefs.h <<_ACEOF -#define VERSION "$VERSION" -_ACEOF +printf "%s\n" "#define VERSION \"$VERSION\"" >>confdefs.h # Some tools Automake needs. @@ -3043,6 +3641,20 @@ am__tar='$${TAR-tar} chof - "$$tardir"' am__untar='$${TAR-tar} xf -' +# Variables for tags utilities; see am/tags.am +if test -z "$CTAGS"; then + CTAGS=ctags +fi + +if test -z "$ETAGS"; then + ETAGS=etags +fi + +if test -z "$CSCOPE"; then + CSCOPE=cscope +fi + + # POSIX will say in a future version that running "rm -f" with no argument # is OK; and we want to be able to make that assumption in our Makefile @@ -3088,17 +3700,18 @@ fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to enable maintainer-specific portions of Makefiles" >&5 -$as_echo_n "checking whether to enable maintainer-specific portions of Makefiles... " >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether to enable maintainer-specific portions of Makefiles" >&5 +printf %s "checking whether to enable maintainer-specific portions of Makefiles... " >&6; } # Check whether --enable-maintainer-mode was given. -if test "${enable_maintainer_mode+set}" = set; then : +if test ${enable_maintainer_mode+y} +then : enableval=$enable_maintainer_mode; USE_MAINTAINER_MODE=$enableval -else +else $as_nop USE_MAINTAINER_MODE=no fi - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $USE_MAINTAINER_MODE" >&5 -$as_echo "$USE_MAINTAINER_MODE" >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $USE_MAINTAINER_MODE" >&5 +printf "%s\n" "$USE_MAINTAINER_MODE" >&6; } if test $USE_MAINTAINER_MODE = yes; then MAINTAINER_MODE_TRUE= MAINTAINER_MODE_FALSE='#' @@ -3111,12 +3724,21 @@ fi + + + + + + + + + DEPDIR="${am__leading_dot}deps" ac_config_commands="$ac_config_commands depfiles" -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether ${MAKE-make} supports the include directive" >&5 -$as_echo_n "checking whether ${MAKE-make} supports the include directive... " >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether ${MAKE-make} supports the include directive" >&5 +printf %s "checking whether ${MAKE-make} supports the include directive... " >&6; } cat > confinc.mk << 'END' am__doit: @echo this is the am__doit target >confinc.out @@ -3152,11 +3774,12 @@ esac fi done rm -f confinc.* confmf.* -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: ${_am_result}" >&5 -$as_echo "${_am_result}" >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: ${_am_result}" >&5 +printf "%s\n" "${_am_result}" >&6; } # Check whether --enable-dependency-tracking was given. -if test "${enable_dependency_tracking+set}" = set; then : +if test ${enable_dependency_tracking+y} +then : enableval=$enable_dependency_tracking; fi @@ -3182,11 +3805,12 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu if test -n "$ac_tool_prefix"; then # Extract the first word of "${ac_tool_prefix}gcc", so it can be a program name with args. set dummy ${ac_tool_prefix}gcc; ac_word=$2 -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 -$as_echo_n "checking for $ac_word... " >&6; } -if ${ac_cv_prog_CC+:} false; then : - $as_echo_n "(cached) " >&6 -else +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_CC+y} +then : + printf %s "(cached) " >&6 +else $as_nop if test -n "$CC"; then ac_cv_prog_CC="$CC" # Let the user override the test. else @@ -3194,11 +3818,15 @@ as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac for ac_exec_ext in '' $ac_executable_extensions; do - if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_prog_CC="${ac_tool_prefix}gcc" - $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done @@ -3209,11 +3837,11 @@ fi fi CC=$ac_cv_prog_CC if test -n "$CC"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 -$as_echo "$CC" >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +printf "%s\n" "$CC" >&6; } else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } fi @@ -3222,11 +3850,12 @@ if test -z "$ac_cv_prog_CC"; then ac_ct_CC=$CC # Extract the first word of "gcc", so it can be a program name with args. set dummy gcc; ac_word=$2 -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 -$as_echo_n "checking for $ac_word... " >&6; } -if ${ac_cv_prog_ac_ct_CC+:} false; then : - $as_echo_n "(cached) " >&6 -else +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_ac_ct_CC+y} +then : + printf %s "(cached) " >&6 +else $as_nop if test -n "$ac_ct_CC"; then ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. else @@ -3234,11 +3863,15 @@ as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac for ac_exec_ext in '' $ac_executable_extensions; do - if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_prog_ac_ct_CC="gcc" - $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done @@ -3249,11 +3882,11 @@ fi fi ac_ct_CC=$ac_cv_prog_ac_ct_CC if test -n "$ac_ct_CC"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5 -$as_echo "$ac_ct_CC" >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5 +printf "%s\n" "$ac_ct_CC" >&6; } else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } fi if test "x$ac_ct_CC" = x; then @@ -3261,8 +3894,8 @@ fi else case $cross_compiling:$ac_tool_warned in yes:) -{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 -$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} ac_tool_warned=yes ;; esac CC=$ac_ct_CC @@ -3275,11 +3908,12 @@ if test -z "$CC"; then if test -n "$ac_tool_prefix"; then # Extract the first word of "${ac_tool_prefix}cc", so it can be a program name with args. set dummy ${ac_tool_prefix}cc; ac_word=$2 -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 -$as_echo_n "checking for $ac_word... " >&6; } -if ${ac_cv_prog_CC+:} false; then : - $as_echo_n "(cached) " >&6 -else +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_CC+y} +then : + printf %s "(cached) " >&6 +else $as_nop if test -n "$CC"; then ac_cv_prog_CC="$CC" # Let the user override the test. else @@ -3287,11 +3921,15 @@ as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac for ac_exec_ext in '' $ac_executable_extensions; do - if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_prog_CC="${ac_tool_prefix}cc" - $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done @@ -3302,11 +3940,11 @@ fi fi CC=$ac_cv_prog_CC if test -n "$CC"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 -$as_echo "$CC" >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +printf "%s\n" "$CC" >&6; } else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } fi @@ -3315,11 +3953,12 @@ fi if test -z "$CC"; then # Extract the first word of "cc", so it can be a program name with args. set dummy cc; ac_word=$2 -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 -$as_echo_n "checking for $ac_word... " >&6; } -if ${ac_cv_prog_CC+:} false; then : - $as_echo_n "(cached) " >&6 -else +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_CC+y} +then : + printf %s "(cached) " >&6 +else $as_nop if test -n "$CC"; then ac_cv_prog_CC="$CC" # Let the user override the test. else @@ -3328,15 +3967,19 @@ as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac for ac_exec_ext in '' $ac_executable_extensions; do - if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then - if test "$as_dir/$ac_word$ac_exec_ext" = "/usr/ucb/cc"; then + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + if test "$as_dir$ac_word$ac_exec_ext" = "/usr/ucb/cc"; then ac_prog_rejected=yes continue fi ac_cv_prog_CC="cc" - $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done @@ -3352,18 +3995,18 @@ if test $ac_prog_rejected = yes; then # However, it has the same basename, so the bogon will be chosen # first if we set CC to just the basename; use the full file name. shift - ac_cv_prog_CC="$as_dir/$ac_word${1+' '}$@" + ac_cv_prog_CC="$as_dir$ac_word${1+' '}$@" fi fi fi fi CC=$ac_cv_prog_CC if test -n "$CC"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 -$as_echo "$CC" >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +printf "%s\n" "$CC" >&6; } else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } fi @@ -3374,11 +4017,12 @@ if test -z "$CC"; then do # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. set dummy $ac_tool_prefix$ac_prog; ac_word=$2 -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 -$as_echo_n "checking for $ac_word... " >&6; } -if ${ac_cv_prog_CC+:} false; then : - $as_echo_n "(cached) " >&6 -else +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_CC+y} +then : + printf %s "(cached) " >&6 +else $as_nop if test -n "$CC"; then ac_cv_prog_CC="$CC" # Let the user override the test. else @@ -3386,11 +4030,15 @@ as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac for ac_exec_ext in '' $ac_executable_extensions; do - if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_prog_CC="$ac_tool_prefix$ac_prog" - $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done @@ -3401,11 +4049,11 @@ fi fi CC=$ac_cv_prog_CC if test -n "$CC"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 -$as_echo "$CC" >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +printf "%s\n" "$CC" >&6; } else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } fi @@ -3418,11 +4066,12 @@ if test -z "$CC"; then do # Extract the first word of "$ac_prog", so it can be a program name with args. set dummy $ac_prog; ac_word=$2 -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 -$as_echo_n "checking for $ac_word... " >&6; } -if ${ac_cv_prog_ac_ct_CC+:} false; then : - $as_echo_n "(cached) " >&6 -else +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_ac_ct_CC+y} +then : + printf %s "(cached) " >&6 +else $as_nop if test -n "$ac_ct_CC"; then ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. else @@ -3430,11 +4079,15 @@ as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac for ac_exec_ext in '' $ac_executable_extensions; do - if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_prog_ac_ct_CC="$ac_prog" - $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done @@ -3445,11 +4098,11 @@ fi fi ac_ct_CC=$ac_cv_prog_ac_ct_CC if test -n "$ac_ct_CC"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5 -$as_echo "$ac_ct_CC" >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5 +printf "%s\n" "$ac_ct_CC" >&6; } else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } fi @@ -3461,34 +4114,138 @@ done else case $cross_compiling:$ac_tool_warned in yes:) -{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 -$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + CC=$ac_ct_CC + fi +fi + +fi +if test -z "$CC"; then + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}clang", so it can be a program name with args. +set dummy ${ac_tool_prefix}clang; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_CC+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_CC="${ac_tool_prefix}clang" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +printf "%s\n" "$CC" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_CC"; then + ac_ct_CC=$CC + # Extract the first word of "clang", so it can be a program name with args. +set dummy clang; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_ac_ct_CC+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$ac_ct_CC"; then + ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_CC="clang" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_CC=$ac_cv_prog_ac_ct_CC +if test -n "$ac_ct_CC"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5 +printf "%s\n" "$ac_ct_CC" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + if test "x$ac_ct_CC" = x; then + CC="" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} ac_tool_warned=yes ;; esac CC=$ac_ct_CC fi +else + CC="$ac_cv_prog_CC" fi fi -test -z "$CC" && { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 -$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +test -z "$CC" && { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;} as_fn_error $? "no acceptable C compiler found in \$PATH See \`config.log' for more details" "$LINENO" 5; } # Provide some information about the compiler. -$as_echo "$as_me:${as_lineno-$LINENO}: checking for C compiler version" >&5 +printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for C compiler version" >&5 set X $ac_compile ac_compiler=$2 -for ac_option in --version -v -V -qversion; do +for ac_option in --version -v -V -qversion -version; do { { ac_try="$ac_compiler $ac_option >&5" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" -$as_echo "$ac_try_echo"; } >&5 +printf "%s\n" "$ac_try_echo"; } >&5 (eval "$ac_compiler $ac_option >&5") 2>conftest.err ac_status=$? if test -s conftest.err; then @@ -3498,7 +4255,7 @@ $as_echo "$ac_try_echo"; } >&5 cat conftest.er1 >&5 fi rm -f conftest.er1 conftest.err - $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } done @@ -3506,7 +4263,7 @@ cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int -main () +main (void) { ; @@ -3518,9 +4275,9 @@ ac_clean_files="$ac_clean_files a.out a.out.dSYM a.exe b.out" # Try to create an executable without -o first, disregard a.out. # It will help us diagnose broken compilers, and finding out an intuition # of exeext. -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the C compiler works" >&5 -$as_echo_n "checking whether the C compiler works... " >&6; } -ac_link_default=`$as_echo "$ac_link" | sed 's/ -o *conftest[^ ]*//'` +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the C compiler works" >&5 +printf %s "checking whether the C compiler works... " >&6; } +ac_link_default=`printf "%s\n" "$ac_link" | sed 's/ -o *conftest[^ ]*//'` # The possible output files: ac_files="a.out conftest.exe conftest a.exe a_out.exe b.out conftest.*" @@ -3541,11 +4298,12 @@ case "(($ac_try" in *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" -$as_echo "$ac_try_echo"; } >&5 +printf "%s\n" "$ac_try_echo"; } >&5 (eval "$ac_link_default") 2>&5 ac_status=$? - $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; }; then : + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } +then : # Autoconf-2.13 could set the ac_cv_exeext variable to `no'. # So ignore a value of `no', otherwise this would lead to `EXEEXT = no' # in a Makefile. We should not override ac_cv_exeext if it was cached, @@ -3562,7 +4320,7 @@ do # certainly right. break;; *.* ) - if test "${ac_cv_exeext+set}" = set && test "$ac_cv_exeext" != no; + if test ${ac_cv_exeext+y} && test "$ac_cv_exeext" != no; then :; else ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` fi @@ -3578,44 +4336,46 @@ do done test "$ac_cv_exeext" = no && ac_cv_exeext= -else +else $as_nop ac_file='' fi -if test -z "$ac_file"; then : - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } -$as_echo "$as_me: failed program was:" >&5 +if test -z "$ac_file" +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +printf "%s\n" "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 -{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 -$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +{ { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;} as_fn_error 77 "C compiler cannot create executables See \`config.log' for more details" "$LINENO" 5; } -else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 -$as_echo "yes" >&6; } -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for C compiler default output file name" >&5 -$as_echo_n "checking for C compiler default output file name... " >&6; } -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_file" >&5 -$as_echo "$ac_file" >&6; } +else $as_nop + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for C compiler default output file name" >&5 +printf %s "checking for C compiler default output file name... " >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_file" >&5 +printf "%s\n" "$ac_file" >&6; } ac_exeext=$ac_cv_exeext rm -f -r a.out a.out.dSYM a.exe conftest$ac_cv_exeext b.out ac_clean_files=$ac_clean_files_save -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for suffix of executables" >&5 -$as_echo_n "checking for suffix of executables... " >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for suffix of executables" >&5 +printf %s "checking for suffix of executables... " >&6; } if { { ac_try="$ac_link" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" -$as_echo "$ac_try_echo"; } >&5 +printf "%s\n" "$ac_try_echo"; } >&5 (eval "$ac_link") 2>&5 ac_status=$? - $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; }; then : + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } +then : # If both `conftest.exe' and `conftest' are `present' (well, observable) # catch `conftest.exe'. For instance with Cygwin, `ls conftest' will # work properly (i.e., refer to `conftest.exe'), while it won't with @@ -3629,15 +4389,15 @@ for ac_file in conftest.exe conftest conftest.*; do * ) break;; esac done -else - { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 -$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +else $as_nop + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;} as_fn_error $? "cannot compute suffix of executables: cannot compile and link See \`config.log' for more details" "$LINENO" 5; } fi rm -f conftest conftest$ac_cv_exeext -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_exeext" >&5 -$as_echo "$ac_cv_exeext" >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_exeext" >&5 +printf "%s\n" "$ac_cv_exeext" >&6; } rm -f conftest.$ac_ext EXEEXT=$ac_cv_exeext @@ -3646,7 +4406,7 @@ cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include int -main () +main (void) { FILE *f = fopen ("conftest.out", "w"); return ferror (f) || fclose (f) != 0; @@ -3658,8 +4418,8 @@ _ACEOF ac_clean_files="$ac_clean_files conftest.out" # Check that the compiler produces executables we can run. If not, either # the compiler is broken, or we cross compile. -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are cross compiling" >&5 -$as_echo_n "checking whether we are cross compiling... " >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether we are cross compiling" >&5 +printf %s "checking whether we are cross compiling... " >&6; } if test "$cross_compiling" != yes; then { { ac_try="$ac_link" case "(($ac_try" in @@ -3667,10 +4427,10 @@ case "(($ac_try" in *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" -$as_echo "$ac_try_echo"; } >&5 +printf "%s\n" "$ac_try_echo"; } >&5 (eval "$ac_link") 2>&5 ac_status=$? - $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } if { ac_try='./conftest$ac_cv_exeext' { { case "(($ac_try" in @@ -3678,39 +4438,40 @@ $as_echo "$ac_try_echo"; } >&5 *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" -$as_echo "$ac_try_echo"; } >&5 +printf "%s\n" "$ac_try_echo"; } >&5 (eval "$ac_try") 2>&5 ac_status=$? - $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; }; then cross_compiling=no else if test "$cross_compiling" = maybe; then cross_compiling=yes else - { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 -$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} -as_fn_error $? "cannot run C compiled programs. + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error 77 "cannot run C compiled programs. If you meant to cross compile, use \`--host'. See \`config.log' for more details" "$LINENO" 5; } fi fi fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $cross_compiling" >&5 -$as_echo "$cross_compiling" >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $cross_compiling" >&5 +printf "%s\n" "$cross_compiling" >&6; } rm -f conftest.$ac_ext conftest$ac_cv_exeext conftest.out ac_clean_files=$ac_clean_files_save -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for suffix of object files" >&5 -$as_echo_n "checking for suffix of object files... " >&6; } -if ${ac_cv_objext+:} false; then : - $as_echo_n "(cached) " >&6 -else +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for suffix of object files" >&5 +printf %s "checking for suffix of object files... " >&6; } +if test ${ac_cv_objext+y} +then : + printf %s "(cached) " >&6 +else $as_nop cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int -main () +main (void) { ; @@ -3724,11 +4485,12 @@ case "(($ac_try" in *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" -$as_echo "$ac_try_echo"; } >&5 +printf "%s\n" "$ac_try_echo"; } >&5 (eval "$ac_compile") 2>&5 ac_status=$? - $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; }; then : + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } +then : for ac_file in conftest.o conftest.obj conftest.*; do test -f "$ac_file" || continue; case $ac_file in @@ -3737,31 +4499,32 @@ $as_echo "$ac_try_echo"; } >&5 break;; esac done -else - $as_echo "$as_me: failed program was:" >&5 +else $as_nop + printf "%s\n" "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 -{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 -$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +{ { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;} as_fn_error $? "cannot compute suffix of object files: cannot compile See \`config.log' for more details" "$LINENO" 5; } fi rm -f conftest.$ac_cv_objext conftest.$ac_ext fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_objext" >&5 -$as_echo "$ac_cv_objext" >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_objext" >&5 +printf "%s\n" "$ac_cv_objext" >&6; } OBJEXT=$ac_cv_objext ac_objext=$OBJEXT -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are using the GNU C compiler" >&5 -$as_echo_n "checking whether we are using the GNU C compiler... " >&6; } -if ${ac_cv_c_compiler_gnu+:} false; then : - $as_echo_n "(cached) " >&6 -else +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the compiler supports GNU C" >&5 +printf %s "checking whether the compiler supports GNU C... " >&6; } +if test ${ac_cv_c_compiler_gnu+y} +then : + printf %s "(cached) " >&6 +else $as_nop cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int -main () +main (void) { #ifndef __GNUC__ choke me @@ -3771,29 +4534,33 @@ main () return 0; } _ACEOF -if ac_fn_c_try_compile "$LINENO"; then : +if ac_fn_c_try_compile "$LINENO" +then : ac_compiler_gnu=yes -else +else $as_nop ac_compiler_gnu=no fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext ac_cv_c_compiler_gnu=$ac_compiler_gnu fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_compiler_gnu" >&5 -$as_echo "$ac_cv_c_compiler_gnu" >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_compiler_gnu" >&5 +printf "%s\n" "$ac_cv_c_compiler_gnu" >&6; } +ac_compiler_gnu=$ac_cv_c_compiler_gnu + if test $ac_compiler_gnu = yes; then GCC=yes else GCC= fi -ac_test_CFLAGS=${CFLAGS+set} +ac_test_CFLAGS=${CFLAGS+y} ac_save_CFLAGS=$CFLAGS -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CC accepts -g" >&5 -$as_echo_n "checking whether $CC accepts -g... " >&6; } -if ${ac_cv_prog_cc_g+:} false; then : - $as_echo_n "(cached) " >&6 -else +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether $CC accepts -g" >&5 +printf %s "checking whether $CC accepts -g... " >&6; } +if test ${ac_cv_prog_cc_g+y} +then : + printf %s "(cached) " >&6 +else $as_nop ac_save_c_werror_flag=$ac_c_werror_flag ac_c_werror_flag=yes ac_cv_prog_cc_g=no @@ -3802,57 +4569,60 @@ else /* end confdefs.h. */ int -main () +main (void) { ; return 0; } _ACEOF -if ac_fn_c_try_compile "$LINENO"; then : +if ac_fn_c_try_compile "$LINENO" +then : ac_cv_prog_cc_g=yes -else +else $as_nop CFLAGS="" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int -main () +main (void) { ; return 0; } _ACEOF -if ac_fn_c_try_compile "$LINENO"; then : +if ac_fn_c_try_compile "$LINENO" +then : -else +else $as_nop ac_c_werror_flag=$ac_save_c_werror_flag CFLAGS="-g" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int -main () +main (void) { ; return 0; } _ACEOF -if ac_fn_c_try_compile "$LINENO"; then : +if ac_fn_c_try_compile "$LINENO" +then : ac_cv_prog_cc_g=yes fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext ac_c_werror_flag=$ac_save_c_werror_flag fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_g" >&5 -$as_echo "$ac_cv_prog_cc_g" >&6; } -if test "$ac_test_CFLAGS" = set; then +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_g" >&5 +printf "%s\n" "$ac_cv_prog_cc_g" >&6; } +if test $ac_test_CFLAGS; then CFLAGS=$ac_save_CFLAGS elif test $ac_cv_prog_cc_g = yes; then if test "$GCC" = yes; then @@ -3867,94 +4637,144 @@ else CFLAGS= fi fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $CC option to accept ISO C89" >&5 -$as_echo_n "checking for $CC option to accept ISO C89... " >&6; } -if ${ac_cv_prog_cc_c89+:} false; then : - $as_echo_n "(cached) " >&6 -else - ac_cv_prog_cc_c89=no +ac_prog_cc_stdc=no +if test x$ac_prog_cc_stdc = xno +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $CC option to enable C11 features" >&5 +printf %s "checking for $CC option to enable C11 features... " >&6; } +if test ${ac_cv_prog_cc_c11+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_cv_prog_cc_c11=no ac_save_CC=$CC cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ -#include -#include -struct stat; -/* Most of the following tests are stolen from RCS 5.7's src/conf.sh. */ -struct buf { int x; }; -FILE * (*rcsopen) (struct buf *, struct stat *, int); -static char *e (p, i) - char **p; - int i; -{ - return p[i]; -} -static char *f (char * (*g) (char **, int), char **p, ...) -{ - char *s; - va_list v; - va_start (v,p); - s = g (p, va_arg (v,int)); - va_end (v); - return s; -} - -/* OSF 4.0 Compaq cc is some sort of almost-ANSI by default. It has - function prototypes and stuff, but not '\xHH' hex character constants. - These don't provoke an error unfortunately, instead are silently treated - as 'x'. The following induces an error, until -std is added to get - proper ANSI mode. Curiously '\x00'!='x' always comes out true, for an - array size at least. It's necessary to write '\x00'==0 to get something - that's true only with -std. */ -int osf4_cc_array ['\x00' == 0 ? 1 : -1]; +$ac_c_conftest_c11_program +_ACEOF +for ac_arg in '' -std=gnu11 +do + CC="$ac_save_CC $ac_arg" + if ac_fn_c_try_compile "$LINENO" +then : + ac_cv_prog_cc_c11=$ac_arg +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam + test "x$ac_cv_prog_cc_c11" != "xno" && break +done +rm -f conftest.$ac_ext +CC=$ac_save_CC +fi -/* IBM C 6 for AIX is almost-ANSI by default, but it replaces macro parameters - inside strings and character constants. */ -#define FOO(x) 'x' -int xlc6_cc_array[FOO(a) == 'x' ? 1 : -1]; +if test "x$ac_cv_prog_cc_c11" = xno +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5 +printf "%s\n" "unsupported" >&6; } +else $as_nop + if test "x$ac_cv_prog_cc_c11" = x +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: none needed" >&5 +printf "%s\n" "none needed" >&6; } +else $as_nop + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c11" >&5 +printf "%s\n" "$ac_cv_prog_cc_c11" >&6; } + CC="$CC $ac_cv_prog_cc_c11" +fi + ac_cv_prog_cc_stdc=$ac_cv_prog_cc_c11 + ac_prog_cc_stdc=c11 +fi +fi +if test x$ac_prog_cc_stdc = xno +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $CC option to enable C99 features" >&5 +printf %s "checking for $CC option to enable C99 features... " >&6; } +if test ${ac_cv_prog_cc_c99+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_cv_prog_cc_c99=no +ac_save_CC=$CC +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$ac_c_conftest_c99_program +_ACEOF +for ac_arg in '' -std=gnu99 -std=c99 -c99 -qlanglvl=extc1x -qlanglvl=extc99 -AC99 -D_STDC_C99= +do + CC="$ac_save_CC $ac_arg" + if ac_fn_c_try_compile "$LINENO" +then : + ac_cv_prog_cc_c99=$ac_arg +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam + test "x$ac_cv_prog_cc_c99" != "xno" && break +done +rm -f conftest.$ac_ext +CC=$ac_save_CC +fi -int test (int i, double x); -struct s1 {int (*f) (int a);}; -struct s2 {int (*f) (double a);}; -int pairnames (int, char **, FILE *(*)(struct buf *, struct stat *, int), int, int); -int argc; -char **argv; -int -main () -{ -return f (e, argv, 0) != argv[0] || f (e, argv, 1) != argv[1]; - ; - return 0; -} +if test "x$ac_cv_prog_cc_c99" = xno +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5 +printf "%s\n" "unsupported" >&6; } +else $as_nop + if test "x$ac_cv_prog_cc_c99" = x +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: none needed" >&5 +printf "%s\n" "none needed" >&6; } +else $as_nop + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c99" >&5 +printf "%s\n" "$ac_cv_prog_cc_c99" >&6; } + CC="$CC $ac_cv_prog_cc_c99" +fi + ac_cv_prog_cc_stdc=$ac_cv_prog_cc_c99 + ac_prog_cc_stdc=c99 +fi +fi +if test x$ac_prog_cc_stdc = xno +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $CC option to enable C89 features" >&5 +printf %s "checking for $CC option to enable C89 features... " >&6; } +if test ${ac_cv_prog_cc_c89+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_cv_prog_cc_c89=no +ac_save_CC=$CC +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$ac_c_conftest_c89_program _ACEOF -for ac_arg in '' -qlanglvl=extc89 -qlanglvl=ansi -std \ - -Ae "-Aa -D_HPUX_SOURCE" "-Xc -D__EXTENSIONS__" +for ac_arg in '' -qlanglvl=extc89 -qlanglvl=ansi -std -Ae "-Aa -D_HPUX_SOURCE" "-Xc -D__EXTENSIONS__" do CC="$ac_save_CC $ac_arg" - if ac_fn_c_try_compile "$LINENO"; then : + if ac_fn_c_try_compile "$LINENO" +then : ac_cv_prog_cc_c89=$ac_arg fi -rm -f core conftest.err conftest.$ac_objext +rm -f core conftest.err conftest.$ac_objext conftest.beam test "x$ac_cv_prog_cc_c89" != "xno" && break done rm -f conftest.$ac_ext CC=$ac_save_CC - fi -# AC_CACHE_VAL -case "x$ac_cv_prog_cc_c89" in - x) - { $as_echo "$as_me:${as_lineno-$LINENO}: result: none needed" >&5 -$as_echo "none needed" >&6; } ;; - xno) - { $as_echo "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5 -$as_echo "unsupported" >&6; } ;; - *) - CC="$CC $ac_cv_prog_cc_c89" - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c89" >&5 -$as_echo "$ac_cv_prog_cc_c89" >&6; } ;; -esac -if test "x$ac_cv_prog_cc_c89" != xno; then : +if test "x$ac_cv_prog_cc_c89" = xno +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5 +printf "%s\n" "unsupported" >&6; } +else $as_nop + if test "x$ac_cv_prog_cc_c89" = x +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: none needed" >&5 +printf "%s\n" "none needed" >&6; } +else $as_nop + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c89" >&5 +printf "%s\n" "$ac_cv_prog_cc_c89" >&6; } + CC="$CC $ac_cv_prog_cc_c89" +fi + ac_cv_prog_cc_stdc=$ac_cv_prog_cc_c89 + ac_prog_cc_stdc=c89 +fi fi ac_ext=c @@ -3963,21 +4783,23 @@ ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu -ac_ext=c + + ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CC understands -c and -o together" >&5 -$as_echo_n "checking whether $CC understands -c and -o together... " >&6; } -if ${am_cv_prog_cc_c_o+:} false; then : - $as_echo_n "(cached) " >&6 -else +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether $CC understands -c and -o together" >&5 +printf %s "checking whether $CC understands -c and -o together... " >&6; } +if test ${am_cv_prog_cc_c_o+y} +then : + printf %s "(cached) " >&6 +else $as_nop cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int -main () +main (void) { ; @@ -4005,8 +4827,8 @@ _ACEOF rm -f core conftest* unset am_i fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $am_cv_prog_cc_c_o" >&5 -$as_echo "$am_cv_prog_cc_c_o" >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $am_cv_prog_cc_c_o" >&5 +printf "%s\n" "$am_cv_prog_cc_c_o" >&6; } if test "$am_cv_prog_cc_c_o" != yes; then # Losing compiler, so override with the script. # FIXME: It is wrong to rewrite CC. @@ -4024,11 +4846,12 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu depcc="$CC" am_compiler_list= -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking dependency style of $depcc" >&5 -$as_echo_n "checking dependency style of $depcc... " >&6; } -if ${am_cv_CC_dependencies_compiler_type+:} false; then : - $as_echo_n "(cached) " >&6 -else +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking dependency style of $depcc" >&5 +printf %s "checking dependency style of $depcc... " >&6; } +if test ${am_cv_CC_dependencies_compiler_type+y} +then : + printf %s "(cached) " >&6 +else $as_nop if test -z "$AMDEP_TRUE" && test -f "$am_depcomp"; then # We make a subdir and do the tests there. Otherwise we can end up # making bogus files that we don't know about and never remove. For @@ -4135,8 +4958,8 @@ else fi fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $am_cv_CC_dependencies_compiler_type" >&5 -$as_echo "$am_cv_CC_dependencies_compiler_type" >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $am_cv_CC_dependencies_compiler_type" >&5 +printf "%s\n" "$am_cv_CC_dependencies_compiler_type" >&6; } CCDEPMODE=depmode=$am_cv_CC_dependencies_compiler_type if @@ -4150,182 +4973,6 @@ else fi - { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $CC option to accept ISO C99" >&5 -$as_echo_n "checking for $CC option to accept ISO C99... " >&6; } -if ${ac_cv_prog_cc_c99+:} false; then : - $as_echo_n "(cached) " >&6 -else - ac_cv_prog_cc_c99=no -ac_save_CC=$CC -cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ -#include -#include -#include -#include -#include - -// Check varargs macros. These examples are taken from C99 6.10.3.5. -#define debug(...) fprintf (stderr, __VA_ARGS__) -#define showlist(...) puts (#__VA_ARGS__) -#define report(test,...) ((test) ? puts (#test) : printf (__VA_ARGS__)) -static void -test_varargs_macros (void) -{ - int x = 1234; - int y = 5678; - debug ("Flag"); - debug ("X = %d\n", x); - showlist (The first, second, and third items.); - report (x>y, "x is %d but y is %d", x, y); -} - -// Check long long types. -#define BIG64 18446744073709551615ull -#define BIG32 4294967295ul -#define BIG_OK (BIG64 / BIG32 == 4294967297ull && BIG64 % BIG32 == 0) -#if !BIG_OK - your preprocessor is broken; -#endif -#if BIG_OK -#else - your preprocessor is broken; -#endif -static long long int bignum = -9223372036854775807LL; -static unsigned long long int ubignum = BIG64; - -struct incomplete_array -{ - int datasize; - double data[]; -}; - -struct named_init { - int number; - const wchar_t *name; - double average; -}; - -typedef const char *ccp; - -static inline int -test_restrict (ccp restrict text) -{ - // See if C++-style comments work. - // Iterate through items via the restricted pointer. - // Also check for declarations in for loops. - for (unsigned int i = 0; *(text+i) != '\0'; ++i) - continue; - return 0; -} - -// Check varargs and va_copy. -static void -test_varargs (const char *format, ...) -{ - va_list args; - va_start (args, format); - va_list args_copy; - va_copy (args_copy, args); - - const char *str; - int number; - float fnumber; - - while (*format) - { - switch (*format++) - { - case 's': // string - str = va_arg (args_copy, const char *); - break; - case 'd': // int - number = va_arg (args_copy, int); - break; - case 'f': // float - fnumber = va_arg (args_copy, double); - break; - default: - break; - } - } - va_end (args_copy); - va_end (args); -} - -int -main () -{ - - // Check bool. - _Bool success = false; - - // Check restrict. - if (test_restrict ("String literal") == 0) - success = true; - char *restrict newvar = "Another string"; - - // Check varargs. - test_varargs ("s, d' f .", "string", 65, 34.234); - test_varargs_macros (); - - // Check flexible array members. - struct incomplete_array *ia = - malloc (sizeof (struct incomplete_array) + (sizeof (double) * 10)); - ia->datasize = 10; - for (int i = 0; i < ia->datasize; ++i) - ia->data[i] = i * 1.234; - - // Check named initializers. - struct named_init ni = { - .number = 34, - .name = L"Test wide string", - .average = 543.34343, - }; - - ni.number = 58; - - int dynamic_array[ni.number]; - dynamic_array[ni.number - 1] = 543; - - // work around unused variable warnings - return (!success || bignum == 0LL || ubignum == 0uLL || newvar[0] == 'x' - || dynamic_array[ni.number - 1] != 543); - - ; - return 0; -} -_ACEOF -for ac_arg in '' -std=gnu99 -std=c99 -c99 -AC99 -D_STDC_C99= -qlanglvl=extc99 -do - CC="$ac_save_CC $ac_arg" - if ac_fn_c_try_compile "$LINENO"; then : - ac_cv_prog_cc_c99=$ac_arg -fi -rm -f core conftest.err conftest.$ac_objext - test "x$ac_cv_prog_cc_c99" != "xno" && break -done -rm -f conftest.$ac_ext -CC=$ac_save_CC - -fi -# AC_CACHE_VAL -case "x$ac_cv_prog_cc_c99" in - x) - { $as_echo "$as_me:${as_lineno-$LINENO}: result: none needed" >&5 -$as_echo "none needed" >&6; } ;; - xno) - { $as_echo "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5 -$as_echo "unsupported" >&6; } ;; - *) - CC="$CC $ac_cv_prog_cc_c99" - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c99" >&5 -$as_echo "$ac_cv_prog_cc_c99" >&6; } ;; -esac -if test "x$ac_cv_prog_cc_c99" != xno; then : - -fi - ac_ext=c @@ -4333,40 +4980,36 @@ ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking how to run the C preprocessor" >&5 -$as_echo_n "checking how to run the C preprocessor... " >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking how to run the C preprocessor" >&5 +printf %s "checking how to run the C preprocessor... " >&6; } # On Suns, sometimes $CPP names a directory. if test -n "$CPP" && test -d "$CPP"; then CPP= fi if test -z "$CPP"; then - if ${ac_cv_prog_CPP+:} false; then : - $as_echo_n "(cached) " >&6 -else - # Double quotes because CPP needs to be expanded - for CPP in "$CC -E" "$CC -E -traditional-cpp" "/lib/cpp" + if test ${ac_cv_prog_CPP+y} +then : + printf %s "(cached) " >&6 +else $as_nop + # Double quotes because $CC needs to be expanded + for CPP in "$CC -E" "$CC -E -traditional-cpp" cpp /lib/cpp do ac_preproc_ok=false for ac_c_preproc_warn_flag in '' yes do # Use a header file that comes with gcc, so configuring glibc # with a fresh cross-compiler works. - # Prefer to if __STDC__ is defined, since - # exists even on freestanding compilers. # On the NeXT, cc -E runs the code through the compiler's parser, # not just through cpp. "Syntax error" is here to catch this case. cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ -#ifdef __STDC__ -# include -#else -# include -#endif +#include Syntax error _ACEOF -if ac_fn_c_try_cpp "$LINENO"; then : +if ac_fn_c_try_cpp "$LINENO" +then : -else +else $as_nop # Broken: fails on valid input. continue fi @@ -4378,10 +5021,11 @@ rm -f conftest.err conftest.i conftest.$ac_ext /* end confdefs.h. */ #include _ACEOF -if ac_fn_c_try_cpp "$LINENO"; then : +if ac_fn_c_try_cpp "$LINENO" +then : # Broken: success on invalid input. continue -else +else $as_nop # Passes both tests. ac_preproc_ok=: break @@ -4391,7 +5035,8 @@ rm -f conftest.err conftest.i conftest.$ac_ext done # Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped. rm -f conftest.i conftest.err conftest.$ac_ext -if $ac_preproc_ok; then : +if $ac_preproc_ok +then : break fi @@ -4403,29 +5048,24 @@ fi else ac_cv_prog_CPP=$CPP fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $CPP" >&5 -$as_echo "$CPP" >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $CPP" >&5 +printf "%s\n" "$CPP" >&6; } ac_preproc_ok=false for ac_c_preproc_warn_flag in '' yes do # Use a header file that comes with gcc, so configuring glibc # with a fresh cross-compiler works. - # Prefer to if __STDC__ is defined, since - # exists even on freestanding compilers. # On the NeXT, cc -E runs the code through the compiler's parser, # not just through cpp. "Syntax error" is here to catch this case. cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ -#ifdef __STDC__ -# include -#else -# include -#endif +#include Syntax error _ACEOF -if ac_fn_c_try_cpp "$LINENO"; then : +if ac_fn_c_try_cpp "$LINENO" +then : -else +else $as_nop # Broken: fails on valid input. continue fi @@ -4437,10 +5077,11 @@ rm -f conftest.err conftest.i conftest.$ac_ext /* end confdefs.h. */ #include _ACEOF -if ac_fn_c_try_cpp "$LINENO"; then : +if ac_fn_c_try_cpp "$LINENO" +then : # Broken: success on invalid input. continue -else +else $as_nop # Passes both tests. ac_preproc_ok=: break @@ -4450,11 +5091,12 @@ rm -f conftest.err conftest.i conftest.$ac_ext done # Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped. rm -f conftest.i conftest.err conftest.$ac_ext -if $ac_preproc_ok; then : +if $ac_preproc_ok +then : -else - { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 -$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +else $as_nop + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;} as_fn_error $? "C preprocessor \"$CPP\" fails sanity check See \`config.log' for more details" "$LINENO" 5; } fi @@ -4466,11 +5108,12 @@ ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $ ac_compiler_gnu=$ac_cv_c_compiler_gnu -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for grep that handles long lines and -e" >&5 -$as_echo_n "checking for grep that handles long lines and -e... " >&6; } -if ${ac_cv_path_GREP+:} false; then : - $as_echo_n "(cached) " >&6 -else +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for grep that handles long lines and -e" >&5 +printf %s "checking for grep that handles long lines and -e... " >&6; } +if test ${ac_cv_path_GREP+y} +then : + printf %s "(cached) " >&6 +else $as_nop if test -z "$GREP"; then ac_path_GREP_found=false # Loop through the user's path and test for each of PROGNAME-LIST @@ -4478,10 +5121,15 @@ else for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin do IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - for ac_prog in grep ggrep; do + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_prog in grep ggrep + do for ac_exec_ext in '' $ac_executable_extensions; do - ac_path_GREP="$as_dir/$ac_prog$ac_exec_ext" + ac_path_GREP="$as_dir$ac_prog$ac_exec_ext" as_fn_executable_p "$ac_path_GREP" || continue # Check for GNU ac_path_GREP and select it if it is found. # Check for GNU $ac_path_GREP @@ -4490,13 +5138,13 @@ case `"$ac_path_GREP" --version 2>&1` in ac_cv_path_GREP="$ac_path_GREP" ac_path_GREP_found=:;; *) ac_count=0 - $as_echo_n 0123456789 >"conftest.in" + printf %s 0123456789 >"conftest.in" while : do cat "conftest.in" "conftest.in" >"conftest.tmp" mv "conftest.tmp" "conftest.in" cp "conftest.in" "conftest.nl" - $as_echo 'GREP' >> "conftest.nl" + printf "%s\n" 'GREP' >> "conftest.nl" "$ac_path_GREP" -e 'GREP$' -e '-(cannot match)-' < "conftest.nl" >"conftest.out" 2>/dev/null || break diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break as_fn_arith $ac_count + 1 && ac_count=$as_val @@ -4524,16 +5172,17 @@ else fi fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_GREP" >&5 -$as_echo "$ac_cv_path_GREP" >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_GREP" >&5 +printf "%s\n" "$ac_cv_path_GREP" >&6; } GREP="$ac_cv_path_GREP" -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for egrep" >&5 -$as_echo_n "checking for egrep... " >&6; } -if ${ac_cv_path_EGREP+:} false; then : - $as_echo_n "(cached) " >&6 -else +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for egrep" >&5 +printf %s "checking for egrep... " >&6; } +if test ${ac_cv_path_EGREP+y} +then : + printf %s "(cached) " >&6 +else $as_nop if echo a | $GREP -E '(a|b)' >/dev/null 2>&1 then ac_cv_path_EGREP="$GREP -E" else @@ -4544,10 +5193,15 @@ else for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin do IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - for ac_prog in egrep; do + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_prog in egrep + do for ac_exec_ext in '' $ac_executable_extensions; do - ac_path_EGREP="$as_dir/$ac_prog$ac_exec_ext" + ac_path_EGREP="$as_dir$ac_prog$ac_exec_ext" as_fn_executable_p "$ac_path_EGREP" || continue # Check for GNU ac_path_EGREP and select it if it is found. # Check for GNU $ac_path_EGREP @@ -4556,13 +5210,13 @@ case `"$ac_path_EGREP" --version 2>&1` in ac_cv_path_EGREP="$ac_path_EGREP" ac_path_EGREP_found=:;; *) ac_count=0 - $as_echo_n 0123456789 >"conftest.in" + printf %s 0123456789 >"conftest.in" while : do cat "conftest.in" "conftest.in" >"conftest.tmp" mv "conftest.tmp" "conftest.in" cp "conftest.in" "conftest.nl" - $as_echo 'EGREP' >> "conftest.nl" + printf "%s\n" 'EGREP' >> "conftest.nl" "$ac_path_EGREP" 'EGREP$' < "conftest.nl" >"conftest.out" 2>/dev/null || break diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break as_fn_arith $ac_count + 1 && ac_count=$as_val @@ -4591,17 +5245,18 @@ fi fi fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_EGREP" >&5 -$as_echo "$ac_cv_path_EGREP" >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_EGREP" >&5 +printf "%s\n" "$ac_cv_path_EGREP" >&6; } EGREP="$ac_cv_path_EGREP" if test $ac_cv_c_compiler_gnu = yes; then - { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CC needs -traditional" >&5 -$as_echo_n "checking whether $CC needs -traditional... " >&6; } -if ${ac_cv_prog_gcc_traditional+:} false; then : - $as_echo_n "(cached) " >&6 -else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether $CC needs -traditional" >&5 +printf %s "checking whether $CC needs -traditional... " >&6; } +if test ${ac_cv_prog_gcc_traditional+y} +then : + printf %s "(cached) " >&6 +else $as_nop ac_pattern="Autoconf.*'x'" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -4609,12 +5264,13 @@ else Autoconf TIOCGETP _ACEOF if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | - $EGREP "$ac_pattern" >/dev/null 2>&1; then : + $EGREP "$ac_pattern" >/dev/null 2>&1 +then : ac_cv_prog_gcc_traditional=yes -else +else $as_nop ac_cv_prog_gcc_traditional=no fi -rm -f conftest* +rm -rf conftest* if test $ac_cv_prog_gcc_traditional = no; then @@ -4624,15 +5280,16 @@ rm -f conftest* Autoconf TCGETA _ACEOF if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | - $EGREP "$ac_pattern" >/dev/null 2>&1; then : + $EGREP "$ac_pattern" >/dev/null 2>&1 +then : ac_cv_prog_gcc_traditional=yes fi -rm -f conftest* +rm -rf conftest* fi fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_gcc_traditional" >&5 -$as_echo "$ac_cv_prog_gcc_traditional" >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_gcc_traditional" >&5 +printf "%s\n" "$ac_cv_prog_gcc_traditional" >&6; } if test $ac_cv_prog_gcc_traditional = yes; then CC="$CC -traditional" fi @@ -4648,11 +5305,12 @@ test "${CCASFLAGS+set}" = set || CCASFLAGS=$CFLAGS depcc="$CCAS" am_compiler_list= -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking dependency style of $depcc" >&5 -$as_echo_n "checking dependency style of $depcc... " >&6; } -if ${am_cv_CCAS_dependencies_compiler_type+:} false; then : - $as_echo_n "(cached) " >&6 -else +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking dependency style of $depcc" >&5 +printf %s "checking dependency style of $depcc... " >&6; } +if test ${am_cv_CCAS_dependencies_compiler_type+y} +then : + printf %s "(cached) " >&6 +else $as_nop if test -z "$AMDEP_TRUE" && test -f "$am_depcomp"; then # We make a subdir and do the tests there. Otherwise we can end up # making bogus files that we don't know about and never remove. For @@ -4757,8 +5415,8 @@ else fi fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $am_cv_CCAS_dependencies_compiler_type" >&5 -$as_echo "$am_cv_CCAS_dependencies_compiler_type" >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $am_cv_CCAS_dependencies_compiler_type" >&5 +printf "%s\n" "$am_cv_CCAS_dependencies_compiler_type" >&6; } CCASDEPMODE=depmode=$am_cv_CCAS_dependencies_compiler_type if @@ -4775,11 +5433,12 @@ fi if test -n "$ac_tool_prefix"; then # Extract the first word of "${ac_tool_prefix}ranlib", so it can be a program name with args. set dummy ${ac_tool_prefix}ranlib; ac_word=$2 -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 -$as_echo_n "checking for $ac_word... " >&6; } -if ${ac_cv_prog_RANLIB+:} false; then : - $as_echo_n "(cached) " >&6 -else +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_RANLIB+y} +then : + printf %s "(cached) " >&6 +else $as_nop if test -n "$RANLIB"; then ac_cv_prog_RANLIB="$RANLIB" # Let the user override the test. else @@ -4787,11 +5446,15 @@ as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac for ac_exec_ext in '' $ac_executable_extensions; do - if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_prog_RANLIB="${ac_tool_prefix}ranlib" - $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done @@ -4802,11 +5465,11 @@ fi fi RANLIB=$ac_cv_prog_RANLIB if test -n "$RANLIB"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $RANLIB" >&5 -$as_echo "$RANLIB" >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $RANLIB" >&5 +printf "%s\n" "$RANLIB" >&6; } else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } fi @@ -4815,11 +5478,12 @@ if test -z "$ac_cv_prog_RANLIB"; then ac_ct_RANLIB=$RANLIB # Extract the first word of "ranlib", so it can be a program name with args. set dummy ranlib; ac_word=$2 -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 -$as_echo_n "checking for $ac_word... " >&6; } -if ${ac_cv_prog_ac_ct_RANLIB+:} false; then : - $as_echo_n "(cached) " >&6 -else +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_ac_ct_RANLIB+y} +then : + printf %s "(cached) " >&6 +else $as_nop if test -n "$ac_ct_RANLIB"; then ac_cv_prog_ac_ct_RANLIB="$ac_ct_RANLIB" # Let the user override the test. else @@ -4827,11 +5491,15 @@ as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac for ac_exec_ext in '' $ac_executable_extensions; do - if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_prog_ac_ct_RANLIB="ranlib" - $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done @@ -4842,11 +5510,11 @@ fi fi ac_ct_RANLIB=$ac_cv_prog_ac_ct_RANLIB if test -n "$ac_ct_RANLIB"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_RANLIB" >&5 -$as_echo "$ac_ct_RANLIB" >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_RANLIB" >&5 +printf "%s\n" "$ac_ct_RANLIB" >&6; } else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } fi if test "x$ac_ct_RANLIB" = x; then @@ -4854,8 +5522,8 @@ fi else case $cross_compiling:$ac_tool_warned in yes:) -{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 -$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} ac_tool_warned=yes ;; esac RANLIB=$ac_ct_RANLIB @@ -4864,6 +5532,12 @@ else RANLIB="$ac_cv_prog_RANLIB" fi + + + + + + ac_ext=cpp ac_cpp='$CXXCPP $CPPFLAGS' ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' @@ -4874,15 +5548,16 @@ if test -z "$CXX"; then CXX=$CCC else if test -n "$ac_tool_prefix"; then - for ac_prog in g++ c++ gpp aCC CC cxx cc++ cl.exe FCC KCC RCC xlC_r xlC + for ac_prog in g++ c++ gpp aCC CC cxx cc++ cl.exe FCC KCC RCC xlC_r xlC clang++ do # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. set dummy $ac_tool_prefix$ac_prog; ac_word=$2 -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 -$as_echo_n "checking for $ac_word... " >&6; } -if ${ac_cv_prog_CXX+:} false; then : - $as_echo_n "(cached) " >&6 -else +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_CXX+y} +then : + printf %s "(cached) " >&6 +else $as_nop if test -n "$CXX"; then ac_cv_prog_CXX="$CXX" # Let the user override the test. else @@ -4890,11 +5565,15 @@ as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac for ac_exec_ext in '' $ac_executable_extensions; do - if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_prog_CXX="$ac_tool_prefix$ac_prog" - $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done @@ -4905,11 +5584,11 @@ fi fi CXX=$ac_cv_prog_CXX if test -n "$CXX"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CXX" >&5 -$as_echo "$CXX" >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $CXX" >&5 +printf "%s\n" "$CXX" >&6; } else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } fi @@ -4918,15 +5597,16 @@ fi fi if test -z "$CXX"; then ac_ct_CXX=$CXX - for ac_prog in g++ c++ gpp aCC CC cxx cc++ cl.exe FCC KCC RCC xlC_r xlC + for ac_prog in g++ c++ gpp aCC CC cxx cc++ cl.exe FCC KCC RCC xlC_r xlC clang++ do # Extract the first word of "$ac_prog", so it can be a program name with args. set dummy $ac_prog; ac_word=$2 -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 -$as_echo_n "checking for $ac_word... " >&6; } -if ${ac_cv_prog_ac_ct_CXX+:} false; then : - $as_echo_n "(cached) " >&6 -else +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_ac_ct_CXX+y} +then : + printf %s "(cached) " >&6 +else $as_nop if test -n "$ac_ct_CXX"; then ac_cv_prog_ac_ct_CXX="$ac_ct_CXX" # Let the user override the test. else @@ -4934,11 +5614,15 @@ as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac for ac_exec_ext in '' $ac_executable_extensions; do - if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_prog_ac_ct_CXX="$ac_prog" - $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done @@ -4949,11 +5633,11 @@ fi fi ac_ct_CXX=$ac_cv_prog_ac_ct_CXX if test -n "$ac_ct_CXX"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CXX" >&5 -$as_echo "$ac_ct_CXX" >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CXX" >&5 +printf "%s\n" "$ac_ct_CXX" >&6; } else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } fi @@ -4965,8 +5649,8 @@ done else case $cross_compiling:$ac_tool_warned in yes:) -{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 -$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} ac_tool_warned=yes ;; esac CXX=$ac_ct_CXX @@ -4976,7 +5660,7 @@ fi fi fi # Provide some information about the compiler. -$as_echo "$as_me:${as_lineno-$LINENO}: checking for C++ compiler version" >&5 +printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for C++ compiler version" >&5 set X $ac_compile ac_compiler=$2 for ac_option in --version -v -V -qversion; do @@ -4986,7 +5670,7 @@ case "(($ac_try" in *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" -$as_echo "$ac_try_echo"; } >&5 +printf "%s\n" "$ac_try_echo"; } >&5 (eval "$ac_compiler $ac_option >&5") 2>conftest.err ac_status=$? if test -s conftest.err; then @@ -4996,20 +5680,21 @@ $as_echo "$ac_try_echo"; } >&5 cat conftest.er1 >&5 fi rm -f conftest.er1 conftest.err - $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } done -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are using the GNU C++ compiler" >&5 -$as_echo_n "checking whether we are using the GNU C++ compiler... " >&6; } -if ${ac_cv_cxx_compiler_gnu+:} false; then : - $as_echo_n "(cached) " >&6 -else +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the compiler supports GNU C++" >&5 +printf %s "checking whether the compiler supports GNU C++... " >&6; } +if test ${ac_cv_cxx_compiler_gnu+y} +then : + printf %s "(cached) " >&6 +else $as_nop cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int -main () +main (void) { #ifndef __GNUC__ choke me @@ -5019,29 +5704,33 @@ main () return 0; } _ACEOF -if ac_fn_cxx_try_compile "$LINENO"; then : +if ac_fn_cxx_try_compile "$LINENO" +then : ac_compiler_gnu=yes -else +else $as_nop ac_compiler_gnu=no fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext ac_cv_cxx_compiler_gnu=$ac_compiler_gnu fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_cxx_compiler_gnu" >&5 -$as_echo "$ac_cv_cxx_compiler_gnu" >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_cxx_compiler_gnu" >&5 +printf "%s\n" "$ac_cv_cxx_compiler_gnu" >&6; } +ac_compiler_gnu=$ac_cv_cxx_compiler_gnu + if test $ac_compiler_gnu = yes; then GXX=yes else GXX= fi -ac_test_CXXFLAGS=${CXXFLAGS+set} +ac_test_CXXFLAGS=${CXXFLAGS+y} ac_save_CXXFLAGS=$CXXFLAGS -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CXX accepts -g" >&5 -$as_echo_n "checking whether $CXX accepts -g... " >&6; } -if ${ac_cv_prog_cxx_g+:} false; then : - $as_echo_n "(cached) " >&6 -else +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether $CXX accepts -g" >&5 +printf %s "checking whether $CXX accepts -g... " >&6; } +if test ${ac_cv_prog_cxx_g+y} +then : + printf %s "(cached) " >&6 +else $as_nop ac_save_cxx_werror_flag=$ac_cxx_werror_flag ac_cxx_werror_flag=yes ac_cv_prog_cxx_g=no @@ -5050,57 +5739,60 @@ else /* end confdefs.h. */ int -main () +main (void) { ; return 0; } _ACEOF -if ac_fn_cxx_try_compile "$LINENO"; then : +if ac_fn_cxx_try_compile "$LINENO" +then : ac_cv_prog_cxx_g=yes -else +else $as_nop CXXFLAGS="" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int -main () +main (void) { ; return 0; } _ACEOF -if ac_fn_cxx_try_compile "$LINENO"; then : +if ac_fn_cxx_try_compile "$LINENO" +then : -else +else $as_nop ac_cxx_werror_flag=$ac_save_cxx_werror_flag CXXFLAGS="-g" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int -main () +main (void) { ; return 0; } _ACEOF -if ac_fn_cxx_try_compile "$LINENO"; then : +if ac_fn_cxx_try_compile "$LINENO" +then : ac_cv_prog_cxx_g=yes fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext ac_cxx_werror_flag=$ac_save_cxx_werror_flag fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cxx_g" >&5 -$as_echo "$ac_cv_prog_cxx_g" >&6; } -if test "$ac_test_CXXFLAGS" = set; then +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cxx_g" >&5 +printf "%s\n" "$ac_cv_prog_cxx_g" >&6; } +if test $ac_test_CXXFLAGS; then CXXFLAGS=$ac_save_CXXFLAGS elif test $ac_cv_prog_cxx_g = yes; then if test "$GXX" = yes; then @@ -5115,6 +5807,100 @@ else CXXFLAGS= fi fi +ac_prog_cxx_stdcxx=no +if test x$ac_prog_cxx_stdcxx = xno +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $CXX option to enable C++11 features" >&5 +printf %s "checking for $CXX option to enable C++11 features... " >&6; } +if test ${ac_cv_prog_cxx_11+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_cv_prog_cxx_11=no +ac_save_CXX=$CXX +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$ac_cxx_conftest_cxx11_program +_ACEOF +for ac_arg in '' -std=gnu++11 -std=gnu++0x -std=c++11 -std=c++0x -qlanglvl=extended0x -AA +do + CXX="$ac_save_CXX $ac_arg" + if ac_fn_cxx_try_compile "$LINENO" +then : + ac_cv_prog_cxx_cxx11=$ac_arg +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam + test "x$ac_cv_prog_cxx_cxx11" != "xno" && break +done +rm -f conftest.$ac_ext +CXX=$ac_save_CXX +fi + +if test "x$ac_cv_prog_cxx_cxx11" = xno +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5 +printf "%s\n" "unsupported" >&6; } +else $as_nop + if test "x$ac_cv_prog_cxx_cxx11" = x +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: none needed" >&5 +printf "%s\n" "none needed" >&6; } +else $as_nop + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cxx_cxx11" >&5 +printf "%s\n" "$ac_cv_prog_cxx_cxx11" >&6; } + CXX="$CXX $ac_cv_prog_cxx_cxx11" +fi + ac_cv_prog_cxx_stdcxx=$ac_cv_prog_cxx_cxx11 + ac_prog_cxx_stdcxx=cxx11 +fi +fi +if test x$ac_prog_cxx_stdcxx = xno +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $CXX option to enable C++98 features" >&5 +printf %s "checking for $CXX option to enable C++98 features... " >&6; } +if test ${ac_cv_prog_cxx_98+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_cv_prog_cxx_98=no +ac_save_CXX=$CXX +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$ac_cxx_conftest_cxx98_program +_ACEOF +for ac_arg in '' -std=gnu++98 -std=c++98 -qlanglvl=extended -AA +do + CXX="$ac_save_CXX $ac_arg" + if ac_fn_cxx_try_compile "$LINENO" +then : + ac_cv_prog_cxx_cxx98=$ac_arg +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam + test "x$ac_cv_prog_cxx_cxx98" != "xno" && break +done +rm -f conftest.$ac_ext +CXX=$ac_save_CXX +fi + +if test "x$ac_cv_prog_cxx_cxx98" = xno +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5 +printf "%s\n" "unsupported" >&6; } +else $as_nop + if test "x$ac_cv_prog_cxx_cxx98" = x +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: none needed" >&5 +printf "%s\n" "none needed" >&6; } +else $as_nop + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cxx_cxx98" >&5 +printf "%s\n" "$ac_cv_prog_cxx_cxx98" >&6; } + CXX="$CXX $ac_cv_prog_cxx_cxx98" +fi + ac_cv_prog_cxx_stdcxx=$ac_cv_prog_cxx_cxx98 + ac_prog_cxx_stdcxx=cxx98 +fi +fi + ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' @@ -5123,11 +5909,12 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu depcc="$CXX" am_compiler_list= -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking dependency style of $depcc" >&5 -$as_echo_n "checking dependency style of $depcc... " >&6; } -if ${am_cv_CXX_dependencies_compiler_type+:} false; then : - $as_echo_n "(cached) " >&6 -else +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking dependency style of $depcc" >&5 +printf %s "checking dependency style of $depcc... " >&6; } +if test ${am_cv_CXX_dependencies_compiler_type+y} +then : + printf %s "(cached) " >&6 +else $as_nop if test -z "$AMDEP_TRUE" && test -f "$am_depcomp"; then # We make a subdir and do the tests there. Otherwise we can end up # making bogus files that we don't know about and never remove. For @@ -5234,8 +6021,8 @@ else fi fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $am_cv_CXX_dependencies_compiler_type" >&5 -$as_echo "$am_cv_CXX_dependencies_compiler_type" >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $am_cv_CXX_dependencies_compiler_type" >&5 +printf "%s\n" "$am_cv_CXX_dependencies_compiler_type" >&6; } CXXDEPMODE=depmode=$am_cv_CXX_dependencies_compiler_type if @@ -5245,288 +6032,334 @@ CXXDEPMODE=depmode=$am_cv_CXX_dependencies_compiler_type am__fastdepCXX_FALSE='#' else am__fastdepCXX_TRUE='#' - am__fastdepCXX_FALSE= -fi - - - -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for ANSI C header files" >&5 -$as_echo_n "checking for ANSI C header files... " >&6; } -if ${ac_cv_header_stdc+:} false; then : - $as_echo_n "(cached) " >&6 -else - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ -#include -#include -#include -#include - -int -main () -{ - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO"; then : - ac_cv_header_stdc=yes -else - ac_cv_header_stdc=no + am__fastdepCXX_FALSE= fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -if test $ac_cv_header_stdc = yes; then - # SunOS 4.x string.h does not declare mem*, contrary to ANSI. - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ -#include -_ACEOF -if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | - $EGREP "memchr" >/dev/null 2>&1; then : -else - ac_cv_header_stdc=no -fi -rm -f conftest* +ac_header= ac_cache= +for ac_item in $ac_header_c_list +do + if test $ac_cache; then + ac_fn_c_check_header_compile "$LINENO" $ac_header ac_cv_header_$ac_cache "$ac_includes_default" + if eval test \"x\$ac_cv_header_$ac_cache\" = xyes; then + printf "%s\n" "#define $ac_item 1" >> confdefs.h + fi + ac_header= ac_cache= + elif test $ac_header; then + ac_cache=$ac_item + else + ac_header=$ac_item + fi +done -fi -if test $ac_cv_header_stdc = yes; then - # ISC 2.0.2 stdlib.h does not declare free, contrary to ANSI. - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ -#include -_ACEOF -if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | - $EGREP "free" >/dev/null 2>&1; then : -else - ac_cv_header_stdc=no -fi -rm -f conftest* -fi -if test $ac_cv_header_stdc = yes; then - # /bin/cc in Irix-4.0.5 gets non-ANSI ctype macros unless using -ansi. - if test "$cross_compiling" = yes; then : - : -else - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ -#include -#include -#if ((' ' & 0x0FF) == 0x020) -# define ISLOWER(c) ('a' <= (c) && (c) <= 'z') -# define TOUPPER(c) (ISLOWER(c) ? 'A' + ((c) - 'a') : (c)) -#else -# define ISLOWER(c) \ - (('a' <= (c) && (c) <= 'i') \ - || ('j' <= (c) && (c) <= 'r') \ - || ('s' <= (c) && (c) <= 'z')) -# define TOUPPER(c) (ISLOWER(c) ? ((c) | 0x40) : (c)) -#endif -#define XOR(e, f) (((e) && !(f)) || (!(e) && (f))) -int -main () -{ - int i; - for (i = 0; i < 256; i++) - if (XOR (islower (i), ISLOWER (i)) - || toupper (i) != TOUPPER (i)) - return 2; - return 0; -} -_ACEOF -if ac_fn_c_try_run "$LINENO"; then : -else - ac_cv_header_stdc=no -fi -rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ - conftest.$ac_objext conftest.beam conftest.$ac_ext -fi +if test $ac_cv_header_stdlib_h = yes && test $ac_cv_header_string_h = yes +then : + +printf "%s\n" "#define STDC_HEADERS 1" >>confdefs.h fi -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_header_stdc" >&5 -$as_echo "$ac_cv_header_stdc" >&6; } -if test $ac_cv_header_stdc = yes; then +# Autoupdate added the next two lines to ensure that your configure +# script's behavior did not change. They are probably safe to remove. -$as_echo "#define STDC_HEADERS 1" >>confdefs.h +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for egrep" >&5 +printf %s "checking for egrep... " >&6; } +if test ${ac_cv_path_EGREP+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if echo a | $GREP -E '(a|b)' >/dev/null 2>&1 + then ac_cv_path_EGREP="$GREP -E" + else + if test -z "$EGREP"; then + ac_path_EGREP_found=false + # Loop through the user's path and test for each of PROGNAME-LIST + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_prog in egrep + do + for ac_exec_ext in '' $ac_executable_extensions; do + ac_path_EGREP="$as_dir$ac_prog$ac_exec_ext" + as_fn_executable_p "$ac_path_EGREP" || continue +# Check for GNU ac_path_EGREP and select it if it is found. + # Check for GNU $ac_path_EGREP +case `"$ac_path_EGREP" --version 2>&1` in +*GNU*) + ac_cv_path_EGREP="$ac_path_EGREP" ac_path_EGREP_found=:;; +*) + ac_count=0 + printf %s 0123456789 >"conftest.in" + while : + do + cat "conftest.in" "conftest.in" >"conftest.tmp" + mv "conftest.tmp" "conftest.in" + cp "conftest.in" "conftest.nl" + printf "%s\n" 'EGREP' >> "conftest.nl" + "$ac_path_EGREP" 'EGREP$' < "conftest.nl" >"conftest.out" 2>/dev/null || break + diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break + as_fn_arith $ac_count + 1 && ac_count=$as_val + if test $ac_count -gt ${ac_path_EGREP_max-0}; then + # Best one so far, save it but keep looking for a better one + ac_cv_path_EGREP="$ac_path_EGREP" + ac_path_EGREP_max=$ac_count + fi + # 10*(2^10) chars as input seems more than enough + test $ac_count -gt 10 && break + done + rm -f conftest.in conftest.tmp conftest.nl conftest.out;; +esac + $ac_path_EGREP_found && break 3 + done + done + done +IFS=$as_save_IFS + if test -z "$ac_cv_path_EGREP"; then + as_fn_error $? "no acceptable egrep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5 + fi +else + ac_cv_path_EGREP=$EGREP fi -# On IRIX 5.3, sys/types and inttypes.h are conflicting. -for ac_header in sys/types.h sys/stat.h stdlib.h string.h memory.h strings.h \ - inttypes.h stdint.h unistd.h -do : - as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh` -ac_fn_c_check_header_compile "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default -" -if eval test \"x\$"$as_ac_Header"\" = x"yes"; then : - cat >>confdefs.h <<_ACEOF -#define `$as_echo "HAVE_$ac_header" | $as_tr_cpp` 1 -_ACEOF - + fi fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_EGREP" >&5 +printf "%s\n" "$ac_cv_path_EGREP" >&6; } + EGREP="$ac_cv_path_EGREP" -done -for ac_header in sys/endian.h sys/param.h syslog.h -do : - as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh` -ac_fn_c_check_header_mongrel "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default" -if eval test \"x\$"$as_ac_Header"\" = x"yes"; then : - cat >>confdefs.h <<_ACEOF -#define `$as_echo "HAVE_$ac_header" | $as_tr_cpp` 1 -_ACEOF +ac_fn_c_check_header_compile "$LINENO" "sys/endian.h" "ac_cv_header_sys_endian_h" "$ac_includes_default" +if test "x$ac_cv_header_sys_endian_h" = xyes +then : + printf "%s\n" "#define HAVE_SYS_ENDIAN_H 1" >>confdefs.h fi +ac_fn_c_check_header_compile "$LINENO" "sys/param.h" "ac_cv_header_sys_param_h" "$ac_includes_default" +if test "x$ac_cv_header_sys_param_h" = xyes +then : + printf "%s\n" "#define HAVE_SYS_PARAM_H 1" >>confdefs.h -done +fi +ac_fn_c_check_header_compile "$LINENO" "syslog.h" "ac_cv_header_syslog_h" "$ac_includes_default" +if test "x$ac_cv_header_syslog_h" = xyes +then : + printf "%s\n" "#define HAVE_SYSLOG_H 1" >>confdefs.h + +fi # sys/sysctl.h requires sys/types.h on FreeBSD # sys/sysctl.h requires sys/param.h on OpenBSD -for ac_header in sys/sysctl.h -do : - ac_fn_c_check_header_compile "$LINENO" "sys/sysctl.h" "ac_cv_header_sys_sysctl_h" "#include +ac_fn_c_check_header_compile "$LINENO" "sys/sysctl.h" "ac_cv_header_sys_sysctl_h" "#include #ifdef HAVE_SYS_PARAM_H #include #endif " -if test "x$ac_cv_header_sys_sysctl_h" = xyes; then : - cat >>confdefs.h <<_ACEOF -#define HAVE_SYS_SYSCTL_H 1 -_ACEOF +if test "x$ac_cv_header_sys_sysctl_h" = xyes +then : + printf "%s\n" "#define HAVE_SYS_SYSCTL_H 1" >>confdefs.h fi -done +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $CC options needed to detect all undeclared functions" >&5 +printf %s "checking for $CC options needed to detect all undeclared functions... " >&6; } +if test ${ac_cv_c_undeclared_builtin_options+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_save_CFLAGS=$CFLAGS + ac_cv_c_undeclared_builtin_options='cannot detect' + for ac_arg in '' -fno-builtin; do + CFLAGS="$ac_save_CFLAGS $ac_arg" + # This test program should *not* compile successfully. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ +(void) strchr; + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + +else $as_nop + # This test program should compile successfully. + # No library function is consistently available on + # freestanding implementations, so test against a dummy + # declaration. Include always-available headers on the + # off chance that they somehow elicit warnings. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +#include +#include +#include +extern void ac_decl (int, char *); + +int +main (void) +{ +(void) ac_decl (0, (char *) 0); + (void) ac_decl; + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + if test x"$ac_arg" = x +then : + ac_cv_c_undeclared_builtin_options='none needed' +else $as_nop + ac_cv_c_undeclared_builtin_options=$ac_arg +fi + break +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + done + CFLAGS=$ac_save_CFLAGS + +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_undeclared_builtin_options" >&5 +printf "%s\n" "$ac_cv_c_undeclared_builtin_options" >&6; } + case $ac_cv_c_undeclared_builtin_options in #( + 'cannot detect') : + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "cannot make $CC report undeclared builtins +See \`config.log' for more details" "$LINENO" 5; } ;; #( + 'none needed') : + ac_c_undeclared_builtin_options='' ;; #( + *) : + ac_c_undeclared_builtin_options=$ac_cv_c_undeclared_builtin_options ;; +esac -ac_fn_c_check_decl "$LINENO" "be32dec" "ac_cv_have_decl_be32dec" "$ac_includes_default +ac_fn_check_decl "$LINENO" "be32dec" "ac_cv_have_decl_be32dec" "$ac_includes_default #ifdef HAVE_SYS_ENDIAN_H #include #endif -" -if test "x$ac_cv_have_decl_be32dec" = xyes; then : +" "$ac_c_undeclared_builtin_options" "CFLAGS" +if test "x$ac_cv_have_decl_be32dec" = xyes +then : ac_have_decl=1 -else +else $as_nop ac_have_decl=0 fi - -cat >>confdefs.h <<_ACEOF -#define HAVE_DECL_BE32DEC $ac_have_decl -_ACEOF -ac_fn_c_check_decl "$LINENO" "le32dec" "ac_cv_have_decl_le32dec" "$ac_includes_default +printf "%s\n" "#define HAVE_DECL_BE32DEC $ac_have_decl" >>confdefs.h +ac_fn_check_decl "$LINENO" "le32dec" "ac_cv_have_decl_le32dec" "$ac_includes_default #ifdef HAVE_SYS_ENDIAN_H #include #endif -" -if test "x$ac_cv_have_decl_le32dec" = xyes; then : +" "$ac_c_undeclared_builtin_options" "CFLAGS" +if test "x$ac_cv_have_decl_le32dec" = xyes +then : ac_have_decl=1 -else +else $as_nop ac_have_decl=0 fi - -cat >>confdefs.h <<_ACEOF -#define HAVE_DECL_LE32DEC $ac_have_decl -_ACEOF -ac_fn_c_check_decl "$LINENO" "be32enc" "ac_cv_have_decl_be32enc" "$ac_includes_default +printf "%s\n" "#define HAVE_DECL_LE32DEC $ac_have_decl" >>confdefs.h +ac_fn_check_decl "$LINENO" "be32enc" "ac_cv_have_decl_be32enc" "$ac_includes_default #ifdef HAVE_SYS_ENDIAN_H #include #endif -" -if test "x$ac_cv_have_decl_be32enc" = xyes; then : +" "$ac_c_undeclared_builtin_options" "CFLAGS" +if test "x$ac_cv_have_decl_be32enc" = xyes +then : ac_have_decl=1 -else +else $as_nop ac_have_decl=0 fi - -cat >>confdefs.h <<_ACEOF -#define HAVE_DECL_BE32ENC $ac_have_decl -_ACEOF -ac_fn_c_check_decl "$LINENO" "le32enc" "ac_cv_have_decl_le32enc" "$ac_includes_default +printf "%s\n" "#define HAVE_DECL_BE32ENC $ac_have_decl" >>confdefs.h +ac_fn_check_decl "$LINENO" "le32enc" "ac_cv_have_decl_le32enc" "$ac_includes_default #ifdef HAVE_SYS_ENDIAN_H #include #endif -" -if test "x$ac_cv_have_decl_le32enc" = xyes; then : +" "$ac_c_undeclared_builtin_options" "CFLAGS" +if test "x$ac_cv_have_decl_le32enc" = xyes +then : ac_have_decl=1 -else +else $as_nop ac_have_decl=0 fi - -cat >>confdefs.h <<_ACEOF -#define HAVE_DECL_LE32ENC $ac_have_decl -_ACEOF -ac_fn_c_check_decl "$LINENO" "le16dec" "ac_cv_have_decl_le16dec" "$ac_includes_default +printf "%s\n" "#define HAVE_DECL_LE32ENC $ac_have_decl" >>confdefs.h +ac_fn_check_decl "$LINENO" "le16dec" "ac_cv_have_decl_le16dec" "$ac_includes_default #ifdef HAVE_SYS_ENDIAN_H #include #endif -" -if test "x$ac_cv_have_decl_le16dec" = xyes; then : +" "$ac_c_undeclared_builtin_options" "CFLAGS" +if test "x$ac_cv_have_decl_le16dec" = xyes +then : ac_have_decl=1 -else +else $as_nop ac_have_decl=0 fi - -cat >>confdefs.h <<_ACEOF -#define HAVE_DECL_LE16DEC $ac_have_decl -_ACEOF -ac_fn_c_check_decl "$LINENO" "le16enc" "ac_cv_have_decl_le16enc" "$ac_includes_default +printf "%s\n" "#define HAVE_DECL_LE16DEC $ac_have_decl" >>confdefs.h +ac_fn_check_decl "$LINENO" "le16enc" "ac_cv_have_decl_le16enc" "$ac_includes_default #ifdef HAVE_SYS_ENDIAN_H #include #endif -" -if test "x$ac_cv_have_decl_le16enc" = xyes; then : +" "$ac_c_undeclared_builtin_options" "CFLAGS" +if test "x$ac_cv_have_decl_le16enc" = xyes +then : ac_have_decl=1 -else +else $as_nop ac_have_decl=0 fi - -cat >>confdefs.h <<_ACEOF -#define HAVE_DECL_LE16ENC $ac_have_decl -_ACEOF +printf "%s\n" "#define HAVE_DECL_LE16ENC $ac_have_decl" >>confdefs.h ac_fn_c_check_type "$LINENO" "size_t" "ac_cv_type_size_t" "$ac_includes_default" -if test "x$ac_cv_type_size_t" = xyes; then : +if test "x$ac_cv_type_size_t" = xyes +then : -else +else $as_nop -cat >>confdefs.h <<_ACEOF -#define size_t unsigned int -_ACEOF +printf "%s\n" "#define size_t unsigned int" >>confdefs.h fi # The Ultrix 4.2 mips builtin alloca declared by alloca.h only works # for constant arguments. Useless! -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for working alloca.h" >&5 -$as_echo_n "checking for working alloca.h... " >&6; } -if ${ac_cv_working_alloca_h+:} false; then : - $as_echo_n "(cached) " >&6 -else +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for working alloca.h" >&5 +printf %s "checking for working alloca.h... " >&6; } +if test ${ac_cv_working_alloca_h+y} +then : + printf %s "(cached) " >&6 +else $as_nop cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include int -main () +main (void) { char *p = (char *) alloca (2 * sizeof (int)); if (p) return 0; @@ -5534,52 +6367,52 @@ char *p = (char *) alloca (2 * sizeof (int)); return 0; } _ACEOF -if ac_fn_c_try_link "$LINENO"; then : +if ac_fn_c_try_link "$LINENO" +then : ac_cv_working_alloca_h=yes -else +else $as_nop ac_cv_working_alloca_h=no fi -rm -f core conftest.err conftest.$ac_objext \ +rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_working_alloca_h" >&5 -$as_echo "$ac_cv_working_alloca_h" >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_working_alloca_h" >&5 +printf "%s\n" "$ac_cv_working_alloca_h" >&6; } if test $ac_cv_working_alloca_h = yes; then -$as_echo "#define HAVE_ALLOCA_H 1" >>confdefs.h +printf "%s\n" "#define HAVE_ALLOCA_H 1" >>confdefs.h fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for alloca" >&5 -$as_echo_n "checking for alloca... " >&6; } -if ${ac_cv_func_alloca_works+:} false; then : - $as_echo_n "(cached) " >&6 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for alloca" >&5 +printf %s "checking for alloca... " >&6; } +if test ${ac_cv_func_alloca_works+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test $ac_cv_working_alloca_h = yes; then + ac_cv_func_alloca_works=yes else cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ -#ifdef __GNUC__ -# define alloca __builtin_alloca -#else -# ifdef _MSC_VER +#include +#include +#ifndef alloca +# ifdef __GNUC__ +# define alloca __builtin_alloca +# elif defined _MSC_VER # include # define alloca _alloca # else -# ifdef HAVE_ALLOCA_H -# include -# else -# ifdef _AIX - #pragma alloca -# else -# ifndef alloca /* predefined by HP cc +Olibcalls */ -void *alloca (size_t); -# endif -# endif +# ifdef __cplusplus +extern "C" # endif +void *alloca (size_t); # endif #endif int -main () +main (void) { char *p = (char *) alloca (1); if (p) return 0; @@ -5587,20 +6420,22 @@ char *p = (char *) alloca (1); return 0; } _ACEOF -if ac_fn_c_try_link "$LINENO"; then : +if ac_fn_c_try_link "$LINENO" +then : ac_cv_func_alloca_works=yes -else +else $as_nop ac_cv_func_alloca_works=no fi -rm -f core conftest.err conftest.$ac_objext \ +rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_func_alloca_works" >&5 -$as_echo "$ac_cv_func_alloca_works" >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_func_alloca_works" >&5 +printf "%s\n" "$ac_cv_func_alloca_works" >&6; } +fi if test $ac_cv_func_alloca_works = yes; then -$as_echo "#define HAVE_ALLOCA 1" >>confdefs.h +printf "%s\n" "#define HAVE_ALLOCA 1" >>confdefs.h else # The SVR3 libPW and SVR4 libucb both contain incompatible functions @@ -5610,58 +6445,19 @@ else ALLOCA=\${LIBOBJDIR}alloca.$ac_objext -$as_echo "#define C_ALLOCA 1" >>confdefs.h - - -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether \`alloca.c' needs Cray hooks" >&5 -$as_echo_n "checking whether \`alloca.c' needs Cray hooks... " >&6; } -if ${ac_cv_os_cray+:} false; then : - $as_echo_n "(cached) " >&6 -else - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ -#if defined CRAY && ! defined CRAY2 -webecray -#else -wenotbecray -#endif - -_ACEOF -if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | - $EGREP "webecray" >/dev/null 2>&1; then : - ac_cv_os_cray=yes -else - ac_cv_os_cray=no -fi -rm -f conftest* - -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_os_cray" >&5 -$as_echo "$ac_cv_os_cray" >&6; } -if test $ac_cv_os_cray = yes; then - for ac_func in _getb67 GETB67 getb67; do - as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh` -ac_fn_c_check_func "$LINENO" "$ac_func" "$as_ac_var" -if eval test \"x\$"$as_ac_var"\" = x"yes"; then : - -cat >>confdefs.h <<_ACEOF -#define CRAY_STACKSEG_END $ac_func -_ACEOF - - break -fi +printf "%s\n" "#define C_ALLOCA 1" >>confdefs.h - done -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking stack direction for C alloca" >&5 -$as_echo_n "checking stack direction for C alloca... " >&6; } -if ${ac_cv_c_stack_direction+:} false; then : - $as_echo_n "(cached) " >&6 -else - if test "$cross_compiling" = yes; then : +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking stack direction for C alloca" >&5 +printf %s "checking stack direction for C alloca... " >&6; } +if test ${ac_cv_c_stack_direction+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test "$cross_compiling" = yes +then : ac_cv_c_stack_direction=0 -else +else $as_nop cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ $ac_includes_default @@ -5682,9 +6478,10 @@ main (int argc, char **argv) return find_stack_direction (0, argc + !argv + 20) < 0; } _ACEOF -if ac_fn_c_try_run "$LINENO"; then : +if ac_fn_c_try_run "$LINENO" +then : ac_cv_c_stack_direction=1 -else +else $as_nop ac_cv_c_stack_direction=-1 fi rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ @@ -5692,25 +6489,19 @@ rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ fi fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_stack_direction" >&5 -$as_echo "$ac_cv_c_stack_direction" >&6; } -cat >>confdefs.h <<_ACEOF -#define STACK_DIRECTION $ac_cv_c_stack_direction -_ACEOF +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_stack_direction" >&5 +printf "%s\n" "$ac_cv_c_stack_direction" >&6; } +printf "%s\n" "#define STACK_DIRECTION $ac_cv_c_stack_direction" >>confdefs.h fi -for ac_func in getopt_long -do : - ac_fn_c_check_func "$LINENO" "getopt_long" "ac_cv_func_getopt_long" -if test "x$ac_cv_func_getopt_long" = xyes; then : - cat >>confdefs.h <<_ACEOF -#define HAVE_GETOPT_LONG 1 -_ACEOF +ac_fn_c_check_func "$LINENO" "getopt_long" "ac_cv_func_getopt_long" +if test "x$ac_cv_func_getopt_long" = xyes +then : + printf "%s\n" "#define HAVE_GETOPT_LONG 1" >>confdefs.h fi -done MINGW_TARGET=`$CC -dumpmachine 2>&1` @@ -5742,137 +6533,143 @@ case $MINGW_TARGET in esac # Check whether --enable-assembly was given. -if test "${enable_assembly+set}" = set; then : +if test ${enable_assembly+y} +then : enableval=$enable_assembly; fi if test x$enable_assembly != xno; then -$as_echo "#define USE_ASM 1" >>confdefs.h +printf "%s\n" "#define USE_ASM 1" >>confdefs.h fi if test x$enable_assembly != xno -a x$have_x86_64 = xtrue then - { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we can compile AVX code" >&5 -$as_echo_n "checking whether we can compile AVX code... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether we can compile AVX code" >&5 +printf %s "checking whether we can compile AVX code... " >&6; } cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int -main () +main (void) { asm ("vmovdqa %ymm0, %ymm1"); ; return 0; } _ACEOF -if ac_fn_c_try_compile "$LINENO"; then : +if ac_fn_c_try_compile "$LINENO" +then : -$as_echo "#define USE_AVX 1" >>confdefs.h +printf "%s\n" "#define USE_AVX 1" >>confdefs.h - { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 -$as_echo "yes" >&6; } - { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we can compile XOP code" >&5 -$as_echo_n "checking whether we can compile XOP code... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether we can compile XOP code" >&5 +printf %s "checking whether we can compile XOP code... " >&6; } cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int -main () +main (void) { asm ("vprotd \$7, %xmm0, %xmm1"); ; return 0; } _ACEOF -if ac_fn_c_try_compile "$LINENO"; then : +if ac_fn_c_try_compile "$LINENO" +then : -$as_echo "#define USE_XOP 1" >>confdefs.h +printf "%s\n" "#define USE_XOP 1" >>confdefs.h - { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 -$as_echo "yes" >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } -else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } - { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: The assembler does not support the XOP instruction set." >&5 -$as_echo "$as_me: WARNING: The assembler does not support the XOP instruction set." >&2;} +else $as_nop + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: The assembler does not support the XOP instruction set." >&5 +printf "%s\n" "$as_me: WARNING: The assembler does not support the XOP instruction set." >&2;} fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we can compile AVX2 code" >&5 -$as_echo_n "checking whether we can compile AVX2 code... " >&6; } +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether we can compile AVX2 code" >&5 +printf %s "checking whether we can compile AVX2 code... " >&6; } cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int -main () +main (void) { asm ("vpaddd %ymm0, %ymm1, %ymm2"); ; return 0; } _ACEOF -if ac_fn_c_try_compile "$LINENO"; then : +if ac_fn_c_try_compile "$LINENO" +then : -$as_echo "#define USE_AVX2 1" >>confdefs.h +printf "%s\n" "#define USE_AVX2 1" >>confdefs.h - { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 -$as_echo "yes" >&6; } - { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we can compile AVX512 code" >&5 -$as_echo_n "checking whether we can compile AVX512 code... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether we can compile AVX512 code" >&5 +printf %s "checking whether we can compile AVX512 code... " >&6; } cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int -main () +main (void) { asm ("vpaddd %zmm0, %zmm1, %zmm2{%k1}"); ; return 0; } _ACEOF -if ac_fn_c_try_compile "$LINENO"; then : +if ac_fn_c_try_compile "$LINENO" +then : -$as_echo "#define USE_AVX512 1" >>confdefs.h +printf "%s\n" "#define USE_AVX512 1" >>confdefs.h - { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 -$as_echo "yes" >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } -else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } - { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: The assembler does not support the AVX512 instruction set." >&5 -$as_echo "$as_me: WARNING: The assembler does not support the AVX512 instruction set." >&2;} +else $as_nop + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: The assembler does not support the AVX512 instruction set." >&5 +printf "%s\n" "$as_me: WARNING: The assembler does not support the AVX512 instruction set." >&2;} fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext -else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } - { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: The assembler does not support the AVX2 instruction set." >&5 -$as_echo "$as_me: WARNING: The assembler does not support the AVX2 instruction set." >&2;} +else $as_nop + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: The assembler does not support the AVX2 instruction set." >&5 +printf "%s\n" "$as_me: WARNING: The assembler does not support the AVX2 instruction set." >&2;} fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext -else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } - { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: The assembler does not support the AVX instruction set." >&5 -$as_echo "$as_me: WARNING: The assembler does not support the AVX instruction set." >&2;} +else $as_nop + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: The assembler does not support the AVX instruction set." >&5 +printf "%s\n" "$as_me: WARNING: The assembler does not support the AVX instruction set." >&2;} fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for json_loads in -ljansson" >&5 -$as_echo_n "checking for json_loads in -ljansson... " >&6; } -if ${ac_cv_lib_jansson_json_loads+:} false; then : - $as_echo_n "(cached) " >&6 -else +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for json_loads in -ljansson" >&5 +printf %s "checking for json_loads in -ljansson... " >&6; } +if test ${ac_cv_lib_jansson_json_loads+y} +then : + printf %s "(cached) " >&6 +else $as_nop ac_check_lib_save_LIBS=$LIBS LIBS="-ljansson $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext @@ -5881,32 +6678,31 @@ cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ -#ifdef __cplusplus -extern "C" -#endif char json_loads (); int -main () +main (void) { return json_loads (); ; return 0; } _ACEOF -if ac_fn_c_try_link "$LINENO"; then : +if ac_fn_c_try_link "$LINENO" +then : ac_cv_lib_jansson_json_loads=yes -else +else $as_nop ac_cv_lib_jansson_json_loads=no fi -rm -f core conftest.err conftest.$ac_objext \ +rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_jansson_json_loads" >&5 -$as_echo "$ac_cv_lib_jansson_json_loads" >&6; } -if test "x$ac_cv_lib_jansson_json_loads" = xyes; then : +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_jansson_json_loads" >&5 +printf "%s\n" "$ac_cv_lib_jansson_json_loads" >&6; } +if test "x$ac_cv_lib_jansson_json_loads" = xyes +then : request_jansson=false -else +else $as_nop request_jansson=true fi @@ -5914,11 +6710,12 @@ fi # GC2 for GNU static if test "x$have_win32" = "xtrue" ; then # MinGW - { $as_echo "$as_me:${as_lineno-$LINENO}: checking for pthread_create in -lpthread" >&5 -$as_echo_n "checking for pthread_create in -lpthread... " >&6; } -if ${ac_cv_lib_pthread_pthread_create+:} false; then : - $as_echo_n "(cached) " >&6 -else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for pthread_create in -lpthread" >&5 +printf %s "checking for pthread_create in -lpthread... " >&6; } +if test ${ac_cv_lib_pthread_pthread_create+y} +then : + printf %s "(cached) " >&6 +else $as_nop ac_check_lib_save_LIBS=$LIBS LIBS="-lpthread $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext @@ -5927,39 +6724,39 @@ cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ -#ifdef __cplusplus -extern "C" -#endif char pthread_create (); int -main () +main (void) { return pthread_create (); ; return 0; } _ACEOF -if ac_fn_c_try_link "$LINENO"; then : +if ac_fn_c_try_link "$LINENO" +then : ac_cv_lib_pthread_pthread_create=yes -else +else $as_nop ac_cv_lib_pthread_pthread_create=no fi -rm -f core conftest.err conftest.$ac_objext \ +rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_pthread_pthread_create" >&5 -$as_echo "$ac_cv_lib_pthread_pthread_create" >&6; } -if test "x$ac_cv_lib_pthread_pthread_create" = xyes; then : +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_pthread_pthread_create" >&5 +printf "%s\n" "$ac_cv_lib_pthread_pthread_create" >&6; } +if test "x$ac_cv_lib_pthread_pthread_create" = xyes +then : PTHREAD_LIBS="-lpthreadGC2" fi else - { $as_echo "$as_me:${as_lineno-$LINENO}: checking for pthread_create in -lpthread" >&5 -$as_echo_n "checking for pthread_create in -lpthread... " >&6; } -if ${ac_cv_lib_pthread_pthread_create+:} false; then : - $as_echo_n "(cached) " >&6 -else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for pthread_create in -lpthread" >&5 +printf %s "checking for pthread_create in -lpthread... " >&6; } +if test ${ac_cv_lib_pthread_pthread_create+y} +then : + printf %s "(cached) " >&6 +else $as_nop ac_check_lib_save_LIBS=$LIBS LIBS="-lpthread $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext @@ -5968,30 +6765,29 @@ cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ -#ifdef __cplusplus -extern "C" -#endif char pthread_create (); int -main () +main (void) { return pthread_create (); ; return 0; } _ACEOF -if ac_fn_c_try_link "$LINENO"; then : +if ac_fn_c_try_link "$LINENO" +then : ac_cv_lib_pthread_pthread_create=yes -else +else $as_nop ac_cv_lib_pthread_pthread_create=no fi -rm -f core conftest.err conftest.$ac_objext \ +rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_pthread_pthread_create" >&5 -$as_echo "$ac_cv_lib_pthread_pthread_create" >&6; } -if test "x$ac_cv_lib_pthread_pthread_create" = xyes; then : +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_pthread_pthread_create" >&5 +printf "%s\n" "$ac_cv_lib_pthread_pthread_create" >&6; } +if test "x$ac_cv_lib_pthread_pthread_create" = xyes +then : PTHREAD_LIBS="-lpthread" fi @@ -6000,32 +6796,33 @@ fi LDFLAGS="$PTHREAD_LDFLAGS $LDFLAGS" # PTHREAD_LIBS="$PTHREAD_LIBS" -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether __uint128_t is supported" >&5 -$as_echo_n "checking whether __uint128_t is supported... " >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether __uint128_t is supported" >&5 +printf %s "checking whether __uint128_t is supported... " >&6; } cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ static __uint128_t i = 100; int -main () +main (void) { ; return 0; } _ACEOF -if ac_fn_c_try_compile "$LINENO"; then : +if ac_fn_c_try_compile "$LINENO" +then : -$as_echo "#define USE_INT128 1" >>confdefs.h +printf "%s\n" "#define USE_INT128 1" >>confdefs.h - { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 -$as_echo "yes" >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } -else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } +else $as_nop + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext # allow if in Makefile.am if test x$request_jansson = xtrue; then @@ -6084,6 +6881,14 @@ else MINGW_FALSE= fi + if test "x$OS" = "xAPPLE"; then + HAVE_MACOS_TRUE= + HAVE_MACOS_FALSE='#' +else + HAVE_MACOS_TRUE='#' + HAVE_MACOS_FALSE= +fi + if test x$request_jansson = xtrue ; then JANSSON_LIBS="compat/jansson/libjansson.a" @@ -6094,7 +6899,8 @@ fi # libcurl install path (for mingw : --with-curl=/usr/local) # Check whether --with-curl was given. -if test "${with_curl+set}" = set; then : +if test ${with_curl+y} +then : withval=$with_curl; fi @@ -6109,7 +6915,8 @@ fi # SSL install path (for mingw : --with-crypto=/usr/local/ssl) # Check whether --with-crypto was given. -if test "${with_crypto+set}" = set; then : +if test ${with_crypto+y} +then : withval=$with_crypto; fi @@ -6175,8 +6982,8 @@ _ACEOF case $ac_val in #( *${as_nl}*) case $ac_var in #( - *_cv_*) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: cache variable $ac_var contains a newline" >&5 -$as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;; + *_cv_*) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: cache variable $ac_var contains a newline" >&5 +printf "%s\n" "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;; esac case $ac_var in #( _ | IFS | as_nl) ;; #( @@ -6206,15 +7013,15 @@ $as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;; /^ac_cv_env_/b end t clear :clear - s/^\([^=]*\)=\(.*[{}].*\)$/test "${\1+set}" = set || &/ + s/^\([^=]*\)=\(.*[{}].*\)$/test ${\1+y} || &/ t end s/^\([^=]*\)=\(.*\)$/\1=${\1=\2}/ :end' >>confcache if diff "$cache_file" confcache >/dev/null 2>&1; then :; else if test -w "$cache_file"; then if test "x$cache_file" != "x/dev/null"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: updating cache $cache_file" >&5 -$as_echo "$as_me: updating cache $cache_file" >&6;} + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: updating cache $cache_file" >&5 +printf "%s\n" "$as_me: updating cache $cache_file" >&6;} if test ! -f "$cache_file" || test -h "$cache_file"; then cat confcache >"$cache_file" else @@ -6228,8 +7035,8 @@ $as_echo "$as_me: updating cache $cache_file" >&6;} fi fi else - { $as_echo "$as_me:${as_lineno-$LINENO}: not updating unwritable cache $cache_file" >&5 -$as_echo "$as_me: not updating unwritable cache $cache_file" >&6;} + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: not updating unwritable cache $cache_file" >&5 +printf "%s\n" "$as_me: not updating unwritable cache $cache_file" >&6;} fi fi rm -f confcache @@ -6246,7 +7053,7 @@ U= for ac_i in : $LIBOBJS; do test "x$ac_i" = x: && continue # 1. Remove the extension, and $U if already installed. ac_script='s/\$U\././;s/\.o$//;s/\.obj$//' - ac_i=`$as_echo "$ac_i" | sed "$ac_script"` + ac_i=`printf "%s\n" "$ac_i" | sed "$ac_script"` # 2. Prepend LIBOBJDIR. When used with automake>=1.10 LIBOBJDIR # will be set to the directory where LIBOBJS objects are built. as_fn_append ac_libobjs " \${LIBOBJDIR}$ac_i\$U.$ac_objext" @@ -6257,14 +7064,14 @@ LIBOBJS=$ac_libobjs LTLIBOBJS=$ac_ltlibobjs -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking that generated files are newer than configure" >&5 -$as_echo_n "checking that generated files are newer than configure... " >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking that generated files are newer than configure" >&5 +printf %s "checking that generated files are newer than configure... " >&6; } if test -n "$am_sleep_pid"; then # Hide warnings about reused PIDs. wait $am_sleep_pid 2>/dev/null fi - { $as_echo "$as_me:${as_lineno-$LINENO}: result: done" >&5 -$as_echo "done" >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: done" >&5 +printf "%s\n" "done" >&6; } if test -n "$EXEEXT"; then am__EXEEXT_TRUE= am__EXEEXT_FALSE='#' @@ -6321,13 +7128,17 @@ if test -z "${MINGW_TRUE}" && test -z "${MINGW_FALSE}"; then as_fn_error $? "conditional \"MINGW\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi +if test -z "${HAVE_MACOS_TRUE}" && test -z "${HAVE_MACOS_FALSE}"; then + as_fn_error $? "conditional \"HAVE_MACOS\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi : "${CONFIG_STATUS=./config.status}" ac_write_fail=0 ac_clean_files_save=$ac_clean_files ac_clean_files="$ac_clean_files $CONFIG_STATUS" -{ $as_echo "$as_me:${as_lineno-$LINENO}: creating $CONFIG_STATUS" >&5 -$as_echo "$as_me: creating $CONFIG_STATUS" >&6;} +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: creating $CONFIG_STATUS" >&5 +printf "%s\n" "$as_me: creating $CONFIG_STATUS" >&6;} as_write_fail=0 cat >$CONFIG_STATUS <<_ASEOF || as_write_fail=1 #! $SHELL @@ -6350,14 +7161,16 @@ cat >>$CONFIG_STATUS <<\_ASEOF || as_write_fail=1 # Be more Bourne compatible DUALCASE=1; export DUALCASE # for MKS sh -if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then : +as_nop=: +if test ${ZSH_VERSION+y} && (emulate sh) >/dev/null 2>&1 +then : emulate sh NULLCMD=: # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which # is contrary to our usage. Disable this feature. alias -g '${1+"$@"}'='"$@"' setopt NO_GLOB_SUBST -else +else $as_nop case `(set -o) 2>/dev/null` in #( *posix*) : set -o posix ;; #( @@ -6367,46 +7180,46 @@ esac fi + +# Reset variables that may have inherited troublesome values from +# the environment. + +# IFS needs to be set, to space, tab, and newline, in precisely that order. +# (If _AS_PATH_WALK were called with IFS unset, it would have the +# side effect of setting IFS to empty, thus disabling word splitting.) +# Quoting is to prevent editors from complaining about space-tab. as_nl=' ' export as_nl -# Printing a long string crashes Solaris 7 /usr/bin/printf. -as_echo='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' -as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo -as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo$as_echo -# Prefer a ksh shell builtin over an external printf program on Solaris, -# but without wasting forks for bash or zsh. -if test -z "$BASH_VERSION$ZSH_VERSION" \ - && (test "X`print -r -- $as_echo`" = "X$as_echo") 2>/dev/null; then - as_echo='print -r --' - as_echo_n='print -rn --' -elif (test "X`printf %s $as_echo`" = "X$as_echo") 2>/dev/null; then - as_echo='printf %s\n' - as_echo_n='printf %s' -else - if test "X`(/usr/ucb/echo -n -n $as_echo) 2>/dev/null`" = "X-n $as_echo"; then - as_echo_body='eval /usr/ucb/echo -n "$1$as_nl"' - as_echo_n='/usr/ucb/echo -n' - else - as_echo_body='eval expr "X$1" : "X\\(.*\\)"' - as_echo_n_body='eval - arg=$1; - case $arg in #( - *"$as_nl"*) - expr "X$arg" : "X\\(.*\\)$as_nl"; - arg=`expr "X$arg" : ".*$as_nl\\(.*\\)"`;; - esac; - expr "X$arg" : "X\\(.*\\)" | tr -d "$as_nl" - ' - export as_echo_n_body - as_echo_n='sh -c $as_echo_n_body as_echo' - fi - export as_echo_body - as_echo='sh -c $as_echo_body as_echo' -fi +IFS=" "" $as_nl" + +PS1='$ ' +PS2='> ' +PS4='+ ' + +# Ensure predictable behavior from utilities with locale-dependent output. +LC_ALL=C +export LC_ALL +LANGUAGE=C +export LANGUAGE + +# We cannot yet rely on "unset" to work, but we need these variables +# to be unset--not just set to an empty or harmless value--now, to +# avoid bugs in old shells (e.g. pre-3.0 UWIN ksh). This construct +# also avoids known problems related to "unset" and subshell syntax +# in other old shells (e.g. bash 2.01 and pdksh 5.2.14). +for as_var in BASH_ENV ENV MAIL MAILPATH CDPATH +do eval test \${$as_var+y} \ + && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || : +done + +# Ensure that fds 0, 1, and 2 are open. +if (exec 3>&0) 2>/dev/null; then :; else exec 0&1) 2>/dev/null; then :; else exec 1>/dev/null; fi +if (exec 3>&2) ; then :; else exec 2>/dev/null; fi # The user is always right. -if test "${PATH_SEPARATOR+set}" != set; then +if ${PATH_SEPARATOR+false} :; then PATH_SEPARATOR=: (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && { (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 || @@ -6415,13 +7228,6 @@ if test "${PATH_SEPARATOR+set}" != set; then fi -# IFS -# We need space, tab and new line, in precisely that order. Quoting is -# there to prevent editors from complaining about space-tab. -# (If _AS_PATH_WALK were called with IFS unset, it would disable word -# splitting by setting IFS to empty value.) -IFS=" "" $as_nl" - # Find who we are. Look in the path if we contain no directory separator. as_myself= case $0 in #(( @@ -6430,8 +7236,12 @@ case $0 in #(( for as_dir in $PATH do IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + test -r "$as_dir$0" && as_myself=$as_dir$0 && break done IFS=$as_save_IFS @@ -6443,30 +7253,10 @@ if test "x$as_myself" = x; then as_myself=$0 fi if test ! -f "$as_myself"; then - $as_echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2 + printf "%s\n" "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2 exit 1 fi -# Unset variables that we do not need and which cause bugs (e.g. in -# pre-3.0 UWIN ksh). But do not cause bugs in bash 2.01; the "|| exit 1" -# suppresses any "Segmentation fault" message there. '((' could -# trigger a bug in pdksh 5.2.14. -for as_var in BASH_ENV ENV MAIL MAILPATH -do eval test x\${$as_var+set} = xset \ - && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || : -done -PS1='$ ' -PS2='> ' -PS4='+ ' - -# NLS nuisances. -LC_ALL=C -export LC_ALL -LANGUAGE=C -export LANGUAGE - -# CDPATH. -(unset CDPATH) >/dev/null 2>&1 && unset CDPATH # as_fn_error STATUS ERROR [LINENO LOG_FD] @@ -6479,13 +7269,14 @@ as_fn_error () as_status=$1; test $as_status -eq 0 && as_status=1 if test "$4"; then as_lineno=${as_lineno-"$3"} as_lineno_stack=as_lineno_stack=$as_lineno_stack - $as_echo "$as_me:${as_lineno-$LINENO}: error: $2" >&$4 + printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: $2" >&$4 fi - $as_echo "$as_me: error: $2" >&2 + printf "%s\n" "$as_me: error: $2" >&2 as_fn_exit $as_status } # as_fn_error + # as_fn_set_status STATUS # ----------------------- # Set $? to STATUS, without forking. @@ -6512,18 +7303,20 @@ as_fn_unset () { eval $1=; unset $1;} } as_unset=as_fn_unset + # as_fn_append VAR VALUE # ---------------------- # Append the text in VALUE to the end of the definition contained in VAR. Take # advantage of any shell optimizations that allow amortized linear growth over # repeated appends, instead of the typical quadratic growth present in naive # implementations. -if (eval "as_var=1; as_var+=2; test x\$as_var = x12") 2>/dev/null; then : +if (eval "as_var=1; as_var+=2; test x\$as_var = x12") 2>/dev/null +then : eval 'as_fn_append () { eval $1+=\$2 }' -else +else $as_nop as_fn_append () { eval $1=\$$1\$2 @@ -6535,12 +7328,13 @@ fi # as_fn_append # Perform arithmetic evaluation on the ARGs, and store the result in the # global $as_val. Take advantage of shells that can avoid forks. The arguments # must be portable across $(()) and expr. -if (eval "test \$(( 1 + 1 )) = 2") 2>/dev/null; then : +if (eval "test \$(( 1 + 1 )) = 2") 2>/dev/null +then : eval 'as_fn_arith () { as_val=$(( $* )) }' -else +else $as_nop as_fn_arith () { as_val=`expr "$@" || test $? -eq 1` @@ -6571,7 +7365,7 @@ as_me=`$as_basename -- "$0" || $as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ X"$0" : 'X\(//\)$' \| \ X"$0" : 'X\(/\)' \| . 2>/dev/null || -$as_echo X/"$0" | +printf "%s\n" X/"$0" | sed '/^.*\/\([^/][^/]*\)\/*$/{ s//\1/ q @@ -6593,6 +7387,10 @@ as_cr_Letters=$as_cr_letters$as_cr_LETTERS as_cr_digits='0123456789' as_cr_alnum=$as_cr_Letters$as_cr_digits + +# Determine whether it's possible to make 'echo' print without a newline. +# These variables are no longer used directly by Autoconf, but are AC_SUBSTed +# for compatibility with existing Makefiles. ECHO_C= ECHO_N= ECHO_T= case `echo -n x` in #((((( -n*) @@ -6606,6 +7404,12 @@ case `echo -n x` in #((((( ECHO_N='-n';; esac +# For backward compatibility with old third-party macros, we provide +# the shell variables $as_echo and $as_echo_n. New code should use +# AS_ECHO(["message"]) and AS_ECHO_N(["message"]), respectively. +as_echo='printf %s\n' +as_echo_n='printf %s' + rm -f conf$$ conf$$.exe conf$$.file if test -d conf$$.dir; then rm -f conf$$.dir/conf$$.file @@ -6647,7 +7451,7 @@ as_fn_mkdir_p () as_dirs= while :; do case $as_dir in #( - *\'*) as_qdir=`$as_echo "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'( + *\'*) as_qdir=`printf "%s\n" "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'( *) as_qdir=$as_dir;; esac as_dirs="'$as_qdir' $as_dirs" @@ -6656,7 +7460,7 @@ $as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ X"$as_dir" : 'X\(//\)[^/]' \| \ X"$as_dir" : 'X\(//\)$' \| \ X"$as_dir" : 'X\(/\)' \| . 2>/dev/null || -$as_echo X"$as_dir" | +printf "%s\n" X"$as_dir" | sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/ q @@ -6718,8 +7522,8 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" -This file was extended by cpuminer-opt $as_me 23.7, which was -generated by GNU Autoconf 2.69. Invocation command line was +This file was extended by cpuminer-opt $as_me 23.8, which was +generated by GNU Autoconf 2.71. Invocation command line was CONFIG_FILES = $CONFIG_FILES CONFIG_HEADERS = $CONFIG_HEADERS @@ -6781,14 +7585,16 @@ $config_commands Report bugs to the package provider." _ACEOF +ac_cs_config=`printf "%s\n" "$ac_configure_args" | sed "$ac_safe_unquote"` +ac_cs_config_escaped=`printf "%s\n" "$ac_cs_config" | sed "s/^ //; s/'/'\\\\\\\\''/g"` cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 -ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`" +ac_cs_config='$ac_cs_config_escaped' ac_cs_version="\\ -cpuminer-opt config.status 23.7 -configured by $0, generated by GNU Autoconf 2.69, +cpuminer-opt config.status 23.8 +configured by $0, generated by GNU Autoconf 2.71, with options \\"\$ac_cs_config\\" -Copyright (C) 2012 Free Software Foundation, Inc. +Copyright (C) 2021 Free Software Foundation, Inc. This config.status script is free software; the Free Software Foundation gives unlimited permission to copy, distribute and modify it." @@ -6828,15 +7634,15 @@ do -recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r) ac_cs_recheck=: ;; --version | --versio | --versi | --vers | --ver | --ve | --v | -V ) - $as_echo "$ac_cs_version"; exit ;; + printf "%s\n" "$ac_cs_version"; exit ;; --config | --confi | --conf | --con | --co | --c ) - $as_echo "$ac_cs_config"; exit ;; + printf "%s\n" "$ac_cs_config"; exit ;; --debug | --debu | --deb | --de | --d | -d ) debug=: ;; --file | --fil | --fi | --f ) $ac_shift case $ac_optarg in - *\'*) ac_optarg=`$as_echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` ;; + *\'*) ac_optarg=`printf "%s\n" "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` ;; '') as_fn_error $? "missing file argument" ;; esac as_fn_append CONFIG_FILES " '$ac_optarg'" @@ -6844,7 +7650,7 @@ do --header | --heade | --head | --hea ) $ac_shift case $ac_optarg in - *\'*) ac_optarg=`$as_echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` ;; + *\'*) ac_optarg=`printf "%s\n" "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` ;; esac as_fn_append CONFIG_HEADERS " '$ac_optarg'" ac_need_defaults=false;; @@ -6853,7 +7659,7 @@ do as_fn_error $? "ambiguous option: \`$1' Try \`$0 --help' for more information.";; --help | --hel | -h ) - $as_echo "$ac_cs_usage"; exit ;; + printf "%s\n" "$ac_cs_usage"; exit ;; -q | -quiet | --quiet | --quie | --qui | --qu | --q \ | -silent | --silent | --silen | --sile | --sil | --si | --s) ac_cs_silent=: ;; @@ -6881,7 +7687,7 @@ cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 if \$ac_cs_recheck; then set X $SHELL '$0' $ac_configure_args \$ac_configure_extra_args --no-create --no-recursion shift - \$as_echo "running CONFIG_SHELL=$SHELL \$*" >&6 + \printf "%s\n" "running CONFIG_SHELL=$SHELL \$*" >&6 CONFIG_SHELL='$SHELL' export CONFIG_SHELL exec "\$@" @@ -6895,7 +7701,7 @@ exec 5>>config.log sed 'h;s/./-/g;s/^.../## /;s/...$/ ##/;p;x;p;x' <<_ASBOX ## Running $as_me. ## _ASBOX - $as_echo "$ac_log" + printf "%s\n" "$ac_log" } >&5 _ACEOF @@ -6929,9 +7735,9 @@ done # We use the long form for the default assignment because of an extremely # bizarre bug on SunOS 4.1.3. if $ac_need_defaults; then - test "${CONFIG_FILES+set}" = set || CONFIG_FILES=$config_files - test "${CONFIG_HEADERS+set}" = set || CONFIG_HEADERS=$config_headers - test "${CONFIG_COMMANDS+set}" = set || CONFIG_COMMANDS=$config_commands + test ${CONFIG_FILES+y} || CONFIG_FILES=$config_files + test ${CONFIG_HEADERS+y} || CONFIG_HEADERS=$config_headers + test ${CONFIG_COMMANDS+y} || CONFIG_COMMANDS=$config_commands fi # Have a temporary directory for convenience. Make it in the build tree @@ -7267,7 +8073,7 @@ do esac || as_fn_error 1 "cannot find input file: \`$ac_f'" "$LINENO" 5;; esac - case $ac_f in *\'*) ac_f=`$as_echo "$ac_f" | sed "s/'/'\\\\\\\\''/g"`;; esac + case $ac_f in *\'*) ac_f=`printf "%s\n" "$ac_f" | sed "s/'/'\\\\\\\\''/g"`;; esac as_fn_append ac_file_inputs " '$ac_f'" done @@ -7275,17 +8081,17 @@ do # use $as_me), people would be surprised to read: # /* config.h. Generated by config.status. */ configure_input='Generated from '` - $as_echo "$*" | sed 's|^[^:]*/||;s|:[^:]*/|, |g' + printf "%s\n" "$*" | sed 's|^[^:]*/||;s|:[^:]*/|, |g' `' by configure.' if test x"$ac_file" != x-; then configure_input="$ac_file. $configure_input" - { $as_echo "$as_me:${as_lineno-$LINENO}: creating $ac_file" >&5 -$as_echo "$as_me: creating $ac_file" >&6;} + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: creating $ac_file" >&5 +printf "%s\n" "$as_me: creating $ac_file" >&6;} fi # Neutralize special characters interpreted by sed in replacement strings. case $configure_input in #( *\&* | *\|* | *\\* ) - ac_sed_conf_input=`$as_echo "$configure_input" | + ac_sed_conf_input=`printf "%s\n" "$configure_input" | sed 's/[\\\\&|]/\\\\&/g'`;; #( *) ac_sed_conf_input=$configure_input;; esac @@ -7302,7 +8108,7 @@ $as_expr X"$ac_file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ X"$ac_file" : 'X\(//\)[^/]' \| \ X"$ac_file" : 'X\(//\)$' \| \ X"$ac_file" : 'X\(/\)' \| . 2>/dev/null || -$as_echo X"$ac_file" | +printf "%s\n" X"$ac_file" | sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/ q @@ -7326,9 +8132,9 @@ $as_echo X"$ac_file" | case "$ac_dir" in .) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;; *) - ac_dir_suffix=/`$as_echo "$ac_dir" | sed 's|^\.[\\/]||'` + ac_dir_suffix=/`printf "%s\n" "$ac_dir" | sed 's|^\.[\\/]||'` # A ".." for each directory in $ac_dir_suffix. - ac_top_builddir_sub=`$as_echo "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'` + ac_top_builddir_sub=`printf "%s\n" "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'` case $ac_top_builddir_sub in "") ac_top_builddir_sub=. ac_top_build_prefix= ;; *) ac_top_build_prefix=$ac_top_builddir_sub/ ;; @@ -7390,8 +8196,8 @@ ac_sed_dataroot=' case `eval "sed -n \"\$ac_sed_dataroot\" $ac_file_inputs"` in *datarootdir*) ac_datarootdir_seen=yes;; *@datadir@*|*@docdir@*|*@infodir@*|*@localedir@*|*@mandir@*) - { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&5 -$as_echo "$as_me: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&2;} + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&5 +printf "%s\n" "$as_me: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&2;} _ACEOF cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_datarootdir_hack=' @@ -7435,9 +8241,9 @@ test -z "$ac_datarootdir_hack$ac_datarootdir_seen" && { ac_out=`sed -n '/\${datarootdir}/p' "$ac_tmp/out"`; test -n "$ac_out"; } && { ac_out=`sed -n '/^[ ]*datarootdir[ ]*:*=/p' \ "$ac_tmp/out"`; test -z "$ac_out"; } && - { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file contains a reference to the variable \`datarootdir' + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file contains a reference to the variable \`datarootdir' which seems to be undefined. Please make sure it is defined" >&5 -$as_echo "$as_me: WARNING: $ac_file contains a reference to the variable \`datarootdir' +printf "%s\n" "$as_me: WARNING: $ac_file contains a reference to the variable \`datarootdir' which seems to be undefined. Please make sure it is defined" >&2;} rm -f "$ac_tmp/stdin" @@ -7453,20 +8259,20 @@ which seems to be undefined. Please make sure it is defined" >&2;} # if test x"$ac_file" != x-; then { - $as_echo "/* $configure_input */" \ + printf "%s\n" "/* $configure_input */" >&1 \ && eval '$AWK -f "$ac_tmp/defines.awk"' "$ac_file_inputs" } >"$ac_tmp/config.h" \ || as_fn_error $? "could not create $ac_file" "$LINENO" 5 if diff "$ac_file" "$ac_tmp/config.h" >/dev/null 2>&1; then - { $as_echo "$as_me:${as_lineno-$LINENO}: $ac_file is unchanged" >&5 -$as_echo "$as_me: $ac_file is unchanged" >&6;} + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: $ac_file is unchanged" >&5 +printf "%s\n" "$as_me: $ac_file is unchanged" >&6;} else rm -f "$ac_file" mv "$ac_tmp/config.h" "$ac_file" \ || as_fn_error $? "could not create $ac_file" "$LINENO" 5 fi else - $as_echo "/* $configure_input */" \ + printf "%s\n" "/* $configure_input */" >&1 \ && eval '$AWK -f "$ac_tmp/defines.awk"' "$ac_file_inputs" \ || as_fn_error $? "could not create -" "$LINENO" 5 fi @@ -7486,7 +8292,7 @@ $as_expr X"$_am_arg" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ X"$_am_arg" : 'X\(//\)[^/]' \| \ X"$_am_arg" : 'X\(//\)$' \| \ X"$_am_arg" : 'X\(/\)' \| . 2>/dev/null || -$as_echo X"$_am_arg" | +printf "%s\n" X"$_am_arg" | sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/ q @@ -7506,8 +8312,8 @@ $as_echo X"$_am_arg" | s/.*/./; q'`/stamp-h$_am_stamp_count ;; - :C) { $as_echo "$as_me:${as_lineno-$LINENO}: executing $ac_file commands" >&5 -$as_echo "$as_me: executing $ac_file commands" >&6;} + :C) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: executing $ac_file commands" >&5 +printf "%s\n" "$as_me: executing $ac_file commands" >&6;} ;; esac @@ -7533,7 +8339,7 @@ esac for am_mf do # Strip MF so we end up with the name of the file. - am_mf=`$as_echo "$am_mf" | sed -e 's/:.*$//'` + am_mf=`printf "%s\n" "$am_mf" | sed -e 's/:.*$//'` # Check whether this is an Automake generated Makefile which includes # dependency-tracking related rules and includes. # Grep'ing the whole file directly is not great: AIX grep has a line @@ -7545,7 +8351,7 @@ $as_expr X"$am_mf" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ X"$am_mf" : 'X\(//\)[^/]' \| \ X"$am_mf" : 'X\(//\)$' \| \ X"$am_mf" : 'X\(/\)' \| . 2>/dev/null || -$as_echo X"$am_mf" | +printf "%s\n" X"$am_mf" | sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/ q @@ -7567,7 +8373,7 @@ $as_echo X"$am_mf" | $as_expr X/"$am_mf" : '.*/\([^/][^/]*\)/*$' \| \ X"$am_mf" : 'X\(//\)$' \| \ X"$am_mf" : 'X\(/\)' \| . 2>/dev/null || -$as_echo X/"$am_mf" | +printf "%s\n" X/"$am_mf" | sed '/^.*\/\([^/][^/]*\)\/*$/{ s//\1/ q @@ -7592,10 +8398,12 @@ $as_echo X/"$am_mf" | (exit $ac_status); } || am_rc=$? done if test $am_rc -ne 0; then - { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 -$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;} as_fn_error $? "Something went wrong bootstrapping makefile fragments - for automatic dependency tracking. Try re-running configure with the + for automatic dependency tracking. If GNU make was not used, consider + re-running the configure script with MAKE=\"gmake\" (or whatever is + necessary). You can also try re-running configure with the '--disable-dependency-tracking' option to at least be able to build the package (albeit without support for automatic dependency tracking). See \`config.log' for more details" "$LINENO" 5; } @@ -7641,7 +8449,8 @@ if test "$no_create" != yes; then $ac_cs_success || as_fn_exit 1 fi if test -n "$ac_unrecognized_opts" && test "$enable_option_checking" != no; then - { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: unrecognized options: $ac_unrecognized_opts" >&5 -$as_echo "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2;} + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: unrecognized options: $ac_unrecognized_opts" >&5 +printf "%s\n" "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2;} fi + diff --git a/cpu-miner.c b/cpu-miner.c index 3da0c3a5..bbcd0ab6 100644 --- a/cpu-miner.c +++ b/cpu-miner.c @@ -36,7 +36,7 @@ #include #include #include -#include +//#include //#include #include "sysinfos.c" #include "algo/sha/sha256d.h" @@ -1967,18 +1967,6 @@ void sha256_gen_merkle_root( char* merkle_root, struct stratum_ctx* sctx ) sha256d( merkle_root, merkle_root, 64 ); } } -/* -// OpenSSL single sha256, deprecated -void SHA256_gen_merkle_root( char* merkle_root, struct stratum_ctx* sctx ) -{ - SHA256( sctx->job.coinbase, (int)sctx->job.coinbase_size, merkle_root ); - for ( int i = 0; i < sctx->job.merkle_count; i++ ) - { - memcpy( merkle_root + 32, sctx->job.merkle[i], 32 ); - sha256d( merkle_root, merkle_root, 64 ); - } -} -*/ // Default is do_nothing (assumed LE) void set_work_data_big_endian( struct work *work ) @@ -2212,8 +2200,8 @@ static void *miner_thread( void *userdata ) // int64_t max64 = 1000; int nonce_found = 0; - if ( likely( algo_gate.do_this_thread( thr_id ) ) ) - { +// if ( likely( algo_gate.do_this_thread( thr_id ) ) ) +// { if ( have_stratum ) { while ( unlikely( stratum_down ) ) @@ -2262,8 +2250,8 @@ static void *miner_thread( void *userdata ) pthread_rwlock_unlock( &g_work_lock ); - } // do_this_thread - algo_gate.resync_threads( thr_id, &work ); +// } // do_this_thread +// algo_gate.resync_threads( thr_id, &work ); // conditional mining if ( unlikely( !wanna_mine( thr_id ) ) ) @@ -3685,8 +3673,8 @@ void get_defconfig_path(char *out, size_t bufsize, char *argv0); #include "simd-utils.h" -#include "algo/hamsi/hamsi-hash-4way.h" -#include "algo/hamsi/sph_hamsi.h" +#include "algo/echo/aes_ni/hash_api.h" +#include "compat/aes_helper.c" int main(int argc, char *argv[]) { diff --git a/miner.h b/miner.h index 2853a378..0c6d70f1 100644 --- a/miner.h +++ b/miner.h @@ -1,38 +1,41 @@ -#ifndef __MINER_H__ -#define __MINER_H__ +#ifndef MINER_H__ +#define MINER_H__ #include - #if defined(__x86_64__) - #define USER_AGENT_ARCH "x64" + #define USER_AGENT_ARCH "x64" // Intel, AMD x86_64 #elif defined(__aarch64__) - #define USER_AGENT_ARCH "arm" + #define USER_AGENT_ARCH "arm" // AArch64 +//#elif +// #define USER_AGENT_ARCH "R5" // RISC-V #else #define USER_AGENT_ARCH #endif #if defined(__linux) - #define USER_AGENT_OS "L" + #define USER_AGENT_OS "L" // GNU Linux #elif defined(WIN32) - #define USER_AGENT_OS "W" + #define USER_AGENT_OS "W" // MS Windows +#elif defined(__APPLE__) + #define USER_AGENT_OS "M" // Apple MacOS +// is there a generic BSD macro? +#elif defined(__unix__) || defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__NetBSD__) + #define USER_AGENT_OS "U" // BSD unix #else #define USER_AGENT_OS #endif #define USER_AGENT PACKAGE_NAME "-" PACKAGE_VERSION "-" USER_AGENT_ARCH USER_AGENT_OS -//#define MAX_CPUS 128 - +/* #ifdef _MSC_VER -#undef USE_ASM /* to fix */ - +#undef USE_ASM #ifdef NOASM #undef USE_ASM #endif -/* missing arch defines for msvc */ #if defined(_M_X64) #define __i386__ 1 #define __x86_64__ 1 @@ -40,8 +43,8 @@ #define __i386__ 1 #endif -#endif /* _MSC_VER */ - +#endif +*/ #include #include @@ -75,7 +78,7 @@ #endif - +//TODO for windows static inline bool is_root() { #if defined(WIN32) @@ -607,7 +610,6 @@ enum algos { ALGO_GROESTL, ALGO_HEX, ALGO_HMQ1725, - ALGO_HODL, ALGO_JHA, ALGO_KECCAK, ALGO_KECCAKC, @@ -703,7 +705,6 @@ static const char* const algo_names[] = { "groestl", "hex", "hmq1725", - "hodl", "jha", "keccak", "keccakc", @@ -865,7 +866,6 @@ Options:\n\ groestl Groestl coin\n\ hex x16r-hex\n\ hmq1725 Espers\n\ - hodl Hodlcoin\n\ jha jackppot (Jackpotcoin)\n\ keccak Maxcoin\n\ keccakc Creative Coin\n\ diff --git a/simd-utils/simd-128.h b/simd-utils/simd-128.h index 5a784f10..90b9fb3f 100644 --- a/simd-utils/simd-128.h +++ b/simd-utils/simd-128.h @@ -153,10 +153,16 @@ #define v128_unpackhi8 _mm_unpackhi_epi8 // AES +// Nokey means nothing on x86_64 but it saves an instruction and a register +// on ARM. #define v128_aesenc _mm_aesenc_si128 +#define v128_aesenc_nokey(v) _mm_aesenc_si128( v, v128_zero ) #define v128_aesenclast _mm_aesenclast_si128 +#define v128_aesenclast_nokey(v) _mm_aesenclast_si128( v, v128_zero ) #define v128_aesdec _mm_aesdec_si128 +#define v128_aesdec_nokey(v) _mm_aesdec_si128( v, v128_zero ) #define v128_aesdeclast _mm_aesdeclast_si128 +#define v128_aesdeclast_nokey(v) _mm_aesdeclast_si128( v, v128_zero ) // Used instead if casting. typedef union @@ -499,73 +505,141 @@ static inline void memcpy_128( __m128i *dst, const __m128i *src, const int n ) // // Bit rotations -// Slow bit rotation, used as last resort -#define mm128_ror_64_sse2( v, c ) \ +#define v128_shuffle16( v, c ) \ + _mm_shufflehi_epi16( _mm_shufflelo_epi16( v, c ), c ) + +#define v128_qrev32(v) _mm_shuffle_epi32( v, 0xb1 ) +#define v128_swap64_32(v) _mm_shuffle_epi32( v, 0xb1 ) // grandfathered + +#define v128_qrev16(v) v128_shuffle16( v, 0x1b ) +#define v128_lrev16(v) v128_shuffle16( v, 0xb1 ) + +// These sgould never be callled from application code, use rol/ror. +#define v128_ror64_sse2( v, c ) \ _mm_or_si128( _mm_srli_epi64( v, c ), _mm_slli_epi64( v, 64-(c) ) ) -#define mm128_rol_64_sse2( v, c ) \ +#define v128_rol64_sse2( v, c ) \ _mm_or_si128( _mm_slli_epi64( v, c ), _mm_srli_epi64( v, 64-(c) ) ) -#define mm128_ror_32_sse2( v, c ) \ +#define v128_ror32_sse2( v, c ) \ _mm_or_si128( _mm_srli_epi32( v, c ), _mm_slli_epi32( v, 32-(c) ) ) -#define mm128_rol_32_sse2( v, c ) \ +#define v128_rol32_sse2( v, c ) \ _mm_or_si128( _mm_slli_epi32( v, c ), _mm_srli_epi32( v, 32-(c) ) ) #if defined(__AVX512VL__) -#define mm128_ror_64 _mm_ror_epi64 -#define mm128_rol_64 _mm_rol_epi64 -#define mm128_ror_32 _mm_ror_epi32 -#define mm128_rol_32 _mm_rol_epi32 +// AVX512 fastest all rotations. +#define mm128_ror_64 _mm_ror_epi64 +#define mm128_rol_64 _mm_rol_epi64 +#define mm128_ror_32 _mm_ror_epi32 +#define mm128_rol_32 _mm_rol_epi32 + +// ror/rol will alway find the fastest but these names may fit better with +// application code performing shuffles rather than bit rotations. +#define v128_shuflr64_8( v) _mm_ror_epi64( v, 8 ) +#define v128_shufll64_8( v) _mm_rol_epi64( v, 8 ) +#define v128_shuflr64_16(v) _mm_ror_epi64( v, 16 ) +#define v128_shufll64_16(v) _mm_rol_epi64( v, 16 ) +#define v128_shuflr64_24(v) _mm_ror_epi64( v, 24 ) +#define v128_shufll64_24(v) _mm_rol_epi64( v, 24 ) +#define v128_shuflr32_8( v) _mm_ror_epi32( v, 8 ) +#define v128_shufll32_8( v) _mm_rol_epi32( v, 8 ) +#define v128_shuflr32_16(v) _mm_ror_epi32( v, 16 ) +#define v128_shufll32_16(v) _mm_rol_epi32( v, 16 ) -// optimized byte wise rotation #elif defined(__SSSE3__) +// SSE2: fastest 32 bit, very fast 16, fast 8 + +#define v128_shuflr64_8( v ) \ + _mm_shuffle_epi8( v, _mm_set_epi64x( \ + 0x080f0e0d0c0b0a09, 0x0007060504030201 ) ) + +#define v128_shufll64_8( v ) \ + _mm_shuffle_epi8( v, _mm_set_epi64x( \ + 0x0e0d0c0b0a09080f, 0x0605040302010007 ) ) + +#define v128_shuflr64_24( v ) \ + _mm_shuffle_epi8( v, _mm_set_epi64x( \ + 0x0a09080f0e0d0c0b, 0x0201000706050403 ) ) + +#define v128_shufll64_24( v ) \ + _mm_shuffle_epi8( v, _mm_set_epi64x( \ + 0x0c0b0a09080f0e0d, 0x0403020100070605 ) ) + +#define v128_shuflr32_8( v ) \ + _mm_shuffle_epi8( v, _mm_set_epi64x( \ + 0x0c0f0e0d080b0a09, 0x0407060500030201 ) ) + +#define v128_shufll32_8( v ) \ + _mm_shuffle_epi8( v, _mm_set_epi64x( \ + 0x0e0d0c0f0a09080b, 0x0605040702010003 ) ) + +#define mm128_ror_64( v, c ) \ + ( (c) == 8 ) ? v128_shuflr64_8( v ) \ + : ( (c) == 16 ) ? v128_shuffle16( v, 0x39 ) \ + : ( (c) == 24 ) ? v128_shuflr64_24( v ) \ + : ( (c) == 32 ) ? _mm_shuffle_epi32( v, 0xb1 ) \ + : ( (c) == 40 ) ? v128_shufll64_24( v ) \ + : ( (c) == 48 ) ? v128_shuffle16( v, 0x93 ) \ + : ( (c) == 56 ) ? v128_shufll64_8( v ) \ + : v128_ror64_sse2( v, c ) + +#define mm128_rol_64( v, c ) \ + ( (c) == 8 ) ? v128_shufll64_8( v ) \ + : ( (c) == 16 ) ? v128_shuffle16( v, 0x93 ) \ + : ( (c) == 24 ) ? v128_shufll64_24( v ) \ + : ( (c) == 32 ) ? _mm_shuffle_epi32( v, 0xb1 ) \ + : ( (c) == 40 ) ? v128_shuflr64_24( v ) \ + : ( (c) == 48 ) ? v128_shuffle16( v, 0x39 ) \ + : ( (c) == 56 ) ? v128_shuflr64_8( v ) \ + : v128_rol64_sse2( v, c ) + +#define mm128_ror_32( v, c ) \ + ( (c) == 8 ) ? v128_shuflr32_8( v ) \ + : ( (c) == 16 ) ? v128_lrev16( v ) \ + : ( (c) == 24 ) ? v128_shufll32_8( v ) \ + : v128_ror32_sse2( v, c ) + +#define mm128_rol_32( v, c ) \ + ( (c) == 8 ) ? v128_shufll32_8( v ) \ + : ( (c) == 16 ) ? v128_lrev16( v ) \ + : ( (c) == 24 ) ? v128_shuflr32_8( v ) \ + : v128_rol32_sse2( v, c ) + +#elif defined(__SSE2__) +// SSE2: fastest 32 bit, very fast 16 #define mm128_ror_64( v, c ) \ - ( (c) == 32 ) ? _mm_shuffle_epi32( v, 0xb1 ) \ - : ( (c) == 24 ) ? _mm_shuffle_epi8( v, _mm_set_epi64x( \ - 0x0a09080f0e0d0c0b, 0x0201000706050403 ) ) \ - : ( (c) == 16 ) ? _mm_shuffle_epi8( v, _mm_set_epi64x( \ - 0x09080f0e0d0c0b0a, 0x0100070605040302 ) ) \ - : ( (c) == 8 ) ? _mm_shuffle_epi8( v, _mm_set_epi64x( \ - 0x080f0e0d0c0b0a09, 0x0007060504030201 ) ) \ - : mm128_ror_64_sse2( v, c ) + ( (c) == 16 ) ? v128_shuffle16( v, 0x39 ) \ + : ( (c) == 32 ) ? _mm_shuffle_epi32( v, 0xb1 ) \ + : ( (c) == 48 ) ? v128_shuffle16( v, 0x93 ) \ + : v128_ror64_sse2( v, c ) #define mm128_rol_64( v, c ) \ - ( (c) == 32 ) ? _mm_shuffle_epi32( v, 0xb1 ) \ - : ( (c) == 24 ) ? _mm_shuffle_epi8( v, _mm_set_epi64x( \ - 0x0c0b0a09080f0e0d, 0x0403020100070605 ) ) \ - : ( (c) == 16 ) ? _mm_shuffle_epi8( v, _mm_set_epi64x( \ - 0x0d0c0b0a09080f0e, 0x0504030201000706 ) ) \ - : ( (c) == 8 ) ? _mm_shuffle_epi8( v, _mm_set_epi64x( \ - 0x0e0d0c0b0a09080f, 0x0605040302010007 ) ) \ - : mm128_rol_64_sse2( v, c ) + ( (c) == 16 ) ? v128_shuffle16( v, 0x93 ) \ + : ( (c) == 32 ) ? _mm_shuffle_epi32( v, 0xb1 ) \ + : ( (c) == 48 ) ? v128_shuffle16( v, 0x39 ) \ + : v128_rol64_sse2( v, c ) #define mm128_ror_32( v, c ) \ - ( (c) == 16 ) ? _mm_shuffle_epi8( v, _mm_set_epi64x( \ - 0x0d0c0f0e09080b0a, 0x0504070601000302 ) ) \ - : ( (c) == 8 ) ? _mm_shuffle_epi8( v, _mm_set_epi64x( \ - 0x0c0f0e0d080b0a09, 0x0407060500030201 ) ) \ - : mm128_ror_32_sse2( v, c ) + ( (c) == 16 ) ? v128_lrev16( v ) \ + : v128_ror32_sse2( v, c ) #define mm128_rol_32( v, c ) \ - ( (c) == 16 ) ? _mm_shuffle_epi8( v, _mm_set_epi64x( \ - 0x0d0c0f0e09080b0a, 0x0504070601000302 ) ) \ - : ( (c) == 8 ) ? _mm_shuffle_epi8( v, _mm_set_epi64x( \ - 0x0e0d0c0f0a09080b, 0x0605040702010003 ) ) \ - : mm128_rol_32_sse2( v, c ) + ( (c) == 16 ) ? v128_lrev16( v ) \ + : v128_rol32_sse2( v, c ) #else -#define mm128_ror_64 mm128_ror_64_sse2 -#define mm128_rol_64 mm128_rol_64_sse2 -#define mm128_ror_32 mm128_ror_32_sse2 -#define mm128_rol_32 mm128_rol_32_sse2 +#define mm128_ror_64 v128_ror64_sse2 +#define mm128_rol_64 v128_rol64_sse2 +#define mm128_ror_32 v128_ror32_sse2 +#define mm128_rol_32 v128_rol32_sse2 #endif -// Architecturally agnostic naming +// Generic names for portable code #define v128_ror64 mm128_ror_64 #define v128_rol64 mm128_rol_64 #define v128_ror32 mm128_ror_32 @@ -669,9 +743,6 @@ static inline void memcpy_128( __m128i *dst, const __m128i *src, const int n ) // Rotate vector elements accross all lanes -#define v128_shuffle16( v, c ) \ - _mm_or_si128( _mm_shufflehi_epi16( v, c ), _mm_shufflelo_epi16( v, c ) ) - // reverse elements in vector #define v128_swap64(v) _mm_shuffle_epi32( v, 0x4e ) // grandfathered #define v128_rev64(v) _mm_shuffle_epi32( v, 0x4e ) // preferred @@ -685,24 +756,12 @@ static inline void memcpy_128( __m128i *dst, const __m128i *src, const int n ) #define v128_shuflr16(v) v128_shuffle16( v, 0x39 ) #define v128_shufll16(v) v128_shuffle16( v, 0x93 ) -// Some sub-vector shuffles are identical to bit rotation. Shuffle is faster. -// Bit rotation already promotes faster widths. Usage of these versions -// are context sensitive. - -// reverse elements in vector lanes -#define v128_qrev32(v) v128_ror64( v, 32 ) -#define v128_swap64_32(v) v128_ror64( v, 32 ) // grandfathered - -#define v128_qrev16(v) \ - _mm_or_si128( _mm_shufflehi_epi16( v, v128u16( 0x1b ) ) \ - _mm_shufflelo_epi16( v, v128u16( 0x1b ) ) ) - -#define v128_lrev16(v) v128_ror32( v, 16 ) +//TODO fix this // alias bswap -#define v128_qrev8(v) _mm_shuffle_epi8( v, v128_8( 0,1,2,3,4,5,6,7 ) ) -#define v128_lrev8(v) _mm_shuffle_epi8( v, v128_8( 4,5,6,7, 0,1,2,3 ) ) -#define v128_wrev8(v) _mm_shuffle_epi8( v, v128_8( 6,7, 4,5, 2,3, 1,0 ) ) +//#define v128_qrev8(v) _mm_shuffle_epi8( v, v128_8( 0,1,2,3,4,5,6,7 ) ) +//#define v128_lrev8(v) _mm_shuffle_epi8( v, v128_8( 4,5,6,7, 0,1,2,3 ) ) +//#define v128_wrev8(v) _mm_shuffle_epi8( v, v128_8( 6,7, 4,5, 2,3, 1,0 ) ) // reverse bits, can it be done? //#define v128_bitrev8( v ) vrbitq_u8 @@ -790,6 +849,16 @@ static inline __m128i mm128_shuflr_x8( const __m128i v, const int c ) #define mm128_block_bswap32_256 mm128_block_bswap_32 #define v128_block_bswap32_256 mm128_block_bswap_32 + +#define mm128_block_bswap32_128( d, s ) \ +{ \ + __m128i ctl = _mm_set_epi64x( 0x0c0d0e0f08090a0b, 0x0405060700010203 ); \ + casti_m128i( d,0 ) = _mm_shuffle_epi8( casti_m128i( s,0 ), ctl ); \ + casti_m128i( d,1 ) = _mm_shuffle_epi8( casti_m128i( s,1 ), ctl ); \ + casti_m128i( d,2 ) = _mm_shuffle_epi8( casti_m128i( s,2 ), ctl ); \ + casti_m128i( d,3 ) = _mm_shuffle_epi8( casti_m128i( s,3 ), ctl ); \ +} + #define v128_block_bswap32_512( d, s ) \ { \ __m128i ctl = _mm_set_epi64x( 0x0c0d0e0f08090a0b, 0x0405060700010203 ); \ diff --git a/simd-utils/simd-256.h b/simd-utils/simd-256.h index 4174ef71..64895229 100644 --- a/simd-utils/simd-256.h +++ b/simd-utils/simd-256.h @@ -218,7 +218,29 @@ static inline __m256i mm256_not( const __m256i v ) // // Bit rotations. -// Slow version, used as last resort +#define mm256_shuffle16( v, c ) \ + _mm256_shufflehi_epi16( _mm256_shufflelo_epi16( v, c ), c ) + +#define mm256_qrev32(v) _mm256_shuffle_epi32( v, 0xb1 ) +#define mm256_swap64_32 mm256_qrev32 // grandfathered + +#define mm256_qrev16(v) mm256_shuffle16( v, 0x1b ) + +#define mm256_qrev8(v) \ + _mm256_shuffle_epi8( v, mm256_bcast_m128( \ + v128_64( 0x08090a0b0c0d0e0f, 0x0001020304050607 ) ) ) + +#define mm256_lrev16(v) mm256_shuffle16( v, 0xb1 ) + +#define mm256_lrev8(v) \ + _mm256_shuffle_epi8( v, mm256_bcast_m128( \ + v128_64( 0x0c0d0e0f08090a0b, 0x0405060700010203 ) ) ) + +#define mm256_wrev8(v) \ + _mm256_shuffle_epi8( v, mm256_bcast_m128( \ + v128_64( 0x0e0f0c0d0a0b0809, 0x0607040502030001 ) ) ) + +// These should never be called directly by applications. #define mm256_ror_64_avx2( v, c ) \ _mm256_or_si256( _mm256_srli_epi64( v, c ), \ _mm256_slli_epi64( v, 64-(c) ) ) @@ -242,40 +264,76 @@ static inline __m256i mm256_not( const __m256i v ) #define mm256_ror_32 _mm256_ror_epi32 #define mm256_rol_32 _mm256_rol_epi32 +// Redundant but naming may be a better fit in some applications. +#define mm126_shuflr64_8( v) _mm256_ror_epi64( v, 8 ) +#define mm156_shufll64_8( v) _mm256_rol_epi64( v, 8 ) +#define mm256_shuflr64_16(v) _mm256_ror_epi64( v, 16 ) +#define mm256_shufll64_16(v) _mm256_rol_epi64( v, 16 ) +#define mm256_shuflr64_24(v) _mm256_ror_epi64( v, 24 ) +#define mm256_shufll64_24(v) _mm256_rol_epi64( v, 24 ) +#define mm256_shuflr32_8( v) _mm256_ror_epi32( v, 8 ) +#define mm256_shufll32_8( v) _mm256_rol_epi32( v, 8 ) +#define mm256_shuflr32_16(v) _mm256_ror_epi32( v, 16 ) +#define mm256_shufll32_16(v) _mm256_rol_epi32( v, 16 ) + #else +// ROR & ROL will always find the fastest but these names may be a better fit +// in some applications. +#define mm256_shuflr64_8( v ) \ + _mm256_shuffle_epi8( v, mm256_bcast_m128( \ + _mm_set_epi64x( 0x080f0e0d0c0b0a09, 0x0007060504030201 ) ) ) + +#define mm256_shufll64_8( v ) \ + _mm256_shuffle_epi8( v, mm256_bcast_m128( \ + _mm_set_epi64x( 0x0e0d0c0b0a09080f, 0x0605040302010007 ) ) ) + +#define mm256_shuflr64_24( v ) \ + _mm256_shuffle_epi8( v, mm256_bcast_m128( \ + _mm_set_epi64x( 0x0a09080f0e0d0c0b, 0x0201000706050403 ) ) ) + +#define mm256_shufll64_24( v ) \ + _mm256_shuffle_epi8( v, mm256_bcast_m128( \ + _mm_set_epi64x( 0x0c0b0a09080f0e0d, 0x0403020100070605 ) ) ) + +#define mm256_shuflr32_8( v ) \ + _mm256_shuffle_epi8( v, mm256_bcast_m128( \ + _mm_set_epi64x( 0x0c0f0e0d080b0a09, 0x0407060500030201 ) ) ) + +#define mm256_shufll32_8( v ) \ + _mm256_shuffle_epi8( v, mm256_bcast_m128( \ + _mm_set_epi64x( 0x0e0d0c0f0a09080b, 0x0605040702010003 ) ) ) + #define mm256_ror_64( v, c ) \ - ( (c) == 32 ) ? _mm256_shuffle_epi32( v, 0xb1 ) \ - : ( (c) == 24 ) ? _mm256_shuffle_epi8( v, mm256_bcast_m128( \ - _mm_set_epi64x( 0x0a09080f0e0d0c0b, 0x0201000706050403 ) ) ) \ - : ( (c) == 16 ) ? _mm256_shuffle_epi8( v, mm256_bcast_m128( \ - _mm_set_epi64x( 0x09080f0e0d0c0b0a, 0x0100070605040302 ) ) ) \ - : ( (c) == 8 ) ? _mm256_shuffle_epi8( v, mm256_bcast_m128( \ - _mm_set_epi64x( 0x080f0e0d0c0b0a09, 0x0007060504030201 ) ) ) \ + ( (c) == 8 ) ? mm256_shuflr64_8( v ) \ + : ( (c) == 16 ) ? mm256_shuffle16( v, 0x39 ) \ + : ( (c) == 24 ) ? mm256_shuflr64_24( v ) \ + : ( (c) == 32 ) ? _mm256_shuffle_epi32( v, 0xb1 ) \ + : ( (c) == 40 ) ? mm256_shufll64_24( v ) \ + : ( (c) == 48 ) ? mm256_shuffle16( v, 0x93 ) \ + : ( (c) == 56 ) ? mm256_shufll64_8( v ) \ : mm256_ror_64_avx2( v, c ) #define mm256_rol_64( v, c ) \ - ( (c) == 32 ) ? _mm256_shuffle_epi32( v, 0xb1 ) \ - : ( (c) == 24 ) ? _mm256_shuffle_epi8( v, mm256_bcast_m128( \ - _mm_set_epi64x( 0x0c0b0a09080f0e0d, 0x0403020100070605 ) ) ) \ - : ( (c) == 16 ) ? _mm256_shuffle_epi8( v, mm256_bcast_m128( \ - _mm_set_epi64x( 0x0d0c0b0a09080f0e, 0x0504030201000706 ) ) ) \ - : ( (c) == 8 ) ? _mm256_shuffle_epi8( v, mm256_bcast_m128( \ - _mm_set_epi64x( 0x0e0d0c0b0a09080f, 0x0605040302010007 ) ) ) \ + ( (c) == 8 ) ? mm256_shufll64_8( v ) \ + : ( (c) == 16 ) ? mm256_shuffle16( v, 0x93 ) \ + : ( (c) == 24 ) ? mm256_shufll64_24( v ) \ + : ( (c) == 32 ) ? _mm256_shuffle_epi32( v, 0xb1 ) \ + : ( (c) == 40 ) ? mm256_shuflr64_24( v ) \ + : ( (c) == 48 ) ? mm256_shuffle16( v, 0x39 ) \ + : ( (c) == 56 ) ? mm256_shuflr64_8( v ) \ : mm256_rol_64_avx2( v, c ) #define mm256_ror_32( v, c ) \ - ( (c) == 16 ) ? _mm256_shuffle_epi8( v, mm256_bcast_m128( \ - _mm_set_epi64x( 0x0d0c0f0e09080b0a, 0x0504070601000302 ) ) )\ - : ( (c) == 8 ) ? _mm256_shuffle_epi8( v, mm256_bcast_m128( \ - _mm_set_epi64x( 0x0c0f0e0d080b0a09, 0x0407060500030201 ) ) ) \ + ( (c) == 8 ) ? mm256_shuflr32_8( v ) \ + : ( (c) == 16 ) ? mm256_lrev16( v ) \ + : ( (c) == 24 ) ? mm256_shufll32_8( v ) \ : mm256_ror_32_avx2( v, c ) #define mm256_rol_32( v, c ) \ - ( (c) == 16 ) ? _mm256_shuffle_epi8( v, mm256_bcast_m128( \ - _mm_set_epi64x( 0x0d0c0f0e09080b0a, 0x0504070601000302 ) ) ) \ - : ( (c) == 8 ) ? _mm256_shuffle_epi8( v, mm256_bcast_m128( \ - _mm_set_epi64x( 0x0e0d0c0f0a09080b, 0x0605040702010003 ) ) ) \ + ( (c) == 8 ) ? mm256_shufll32_8( v ) \ + : ( (c) == 16 ) ? mm256_lrev16( v ) \ + : ( (c) == 24 ) ? mm256_shuflr32_8( v ) \ : mm256_rol_32_avx2( v, c ) #endif @@ -400,25 +458,19 @@ static inline __m256i mm256_not( const __m256i v ) /* Not used // Rotate 256 bit vector by one 32 bit element. #if defined(__AVX512VL__) - static inline __m256i mm256_shuflr_32( const __m256i v ) { return _mm256_alignr_epi32( v, v, 1 ); } - static inline __m256i mm256_shufll_32( const __m256i v ) { return _mm256_alignr_epi32( v, v, 15 ); } - #else - #define mm256_shuflr_32( v ) \ _mm256_permutevar8x32_epi32( v, \ _mm256_set_spi64x( 0x0000000000000007, 0x0000000600000005, \ 0x0000000400000003, 0x0000000200000001 ) ) - #define mm256_shufll_32( v ) \ _mm256_permutevar8x32_epi32( v, \ _mm256_set_epi64x( 0x0000000600000005, 0x0000000400000003, \ 0x0000000200000001, 0x0000000000000007 ) ) - #endif */ @@ -450,21 +502,6 @@ static inline __m256i mm256_shuflr128_x8( const __m256i v, const int c ) { return _mm256_alignr_epi8( v, v, c ); } */ -// Same as bit rotation but logically used as byte/word rotation. -#define mm256_swap64_32( v ) mm256_ror_64( v, 32 ) // grandfathered -#define mm256_rev64_32( v ) mm256_ror_64( v, 32 ) - -#define mm256_shuflr64_16(v) _mm256_ror_epi64( v, 16 ) -#define mm256_shufll64_16(v) _mm256_rol_epi64( v, 16 ) - -#define mm256_shuflr64_8(v) _mm256_ror_epi64( v, 8 ) -#define mm256_shufll64_8(v) _mm256_rol_epi64( v, 8 ) - -#define mm256_rev32_16( v ) mm256_ror_32( v, 16 ) - -#define mm256_shuflr32_8(v) _mm256_ror_epi32( v, 8 ) -#define mm256_shufll32_8(v) _mm256_rol_epi32( v, 8 ) - // Reverse byte order in elements, endian bswap. #define mm256_bswap_64( v ) \ _mm256_shuffle_epi8( v, mm256_bcast_m128( _mm_set_epi64x( \ diff --git a/simd-utils/simd-int.h b/simd-utils/simd-int.h index 22d986d8..2a4ce0b5 100644 --- a/simd-utils/simd-int.h +++ b/simd-utils/simd-int.h @@ -1,6 +1,9 @@ #if !defined(SIMD_INT_H__) #define SIMD_INT_H__ 1 +//TODO compile time test for byte order +// be64 etc using HW bowap. +// // Endian byte swap #if defined(__x86_64__) @@ -9,8 +12,6 @@ #elif defined(__aarch64__) -//#pragma message "aarch64 fast bswap" - static inline uint64_t bswap_64( uint64_t a ) { uint64_t b; diff --git a/simd-utils/simd-neon.h b/simd-utils/simd-neon.h index 2a7714ea..671a47a3 100644 --- a/simd-utils/simd-neon.h +++ b/simd-utils/simd-neon.h @@ -81,7 +81,7 @@ static inline uint64x2_t v128_mulw32( uint32x4_t v1, uint32x4_t v0 ) #define v128_cmpeq16 vceqq_u16 #define v128_cmpeq8 vceqq_u8 -#define v128_cmpeq0 vceqzq_u64 +#define v128_iszero vceqzq_u64 // Not yet needed //#define v128_cmpeq1 @@ -174,12 +174,31 @@ static inline uint64x2_t v128_mulw32( uint32x4_t v1, uint32x4_t v0 ) // AES -// consistent with Intel AES, break up for optimizing -#define v128_aesenc( v, k ) vaesmcq_u8( vaeseq_u8( v, k ) ) -#define v128_aesenclast( v, k ) vaeseq_u8( v, k ) +// consistent with Intel AES intrinsics, break up for optimizing +#define v128_aesenc( v, k ) \ + v128_xor( k, vaesmcq_u8( vaeseq_u8( v, v128_zero ) ) ) + +#define v128_aesenc_nokey( v ) \ + vaesmcq_u8( vaeseq_u8( v, v128_zero ) ) + +#define v128_aesenclast( v, k ) \ + v128_xor( k, vaeseq_u8( v, v128_zero ) ) + +#define v128_aesenclast_nokey( v, k ) \ + vaeseq_u8( v, v128_zero ) + +#define v128_aesdec( v, k ) \ + v128_xor( k, vaesimcq_u8( vaesdq_u8( v, v128_zero ) ) ) + +#define v128_aesdec_nokey( v, k ) \ + vaesimcq_u8( vaesdq_u8( v, v128_zero ) ) + +#define v128_aesdeclast( v, k ) \ + v128_xor( k, vaesdq_u8( v, v128_zero ) ) + +#define v128_aesdeclast_nokey( v, k ) \ + vaesdq_u8( v, v128_zero ) -#define v128_aesdec( v, k ) vaesimcq_u8( vaesdq_u8( v, k ) ) -#define v128_aesdeclast( v, k ) vaesdq_u8( v, k ) typedef union { @@ -189,7 +208,7 @@ typedef union } __attribute__ ((aligned (16))) v128_ovly; -// Broadcast lane 0 to all lanes +// Broadcast lane 0 to all lanes, consistent with x86_64 broadcast #define v128_bcast64(v) vdupq_laneq_u64( v, 0 ) #define v128_bcast32(v) vdupq_laneq_u32( v, 0 ) #define v128_bcast16(v) vdupq_laneq_u16( v, 0 ) diff --git a/sysinfos.c b/sysinfos.c index 313d9b5c..3e5fcc25 100644 --- a/sysinfos.c +++ b/sysinfos.c @@ -15,7 +15,7 @@ #include #include "miner.h" -#if defined(__aarch64__) +#if defined(__aarch64__) && !defined(__APPLE__) // for arm's "cpuid" #include #include @@ -141,26 +141,13 @@ static inline void linux_cpu_hilo_freq( float *lo, float *hi ) *lo = (float)lo_freq; } - -#else /* WIN32 */ - -static inline float win32_cputemp( int core ) -{ - // todo - return 0.0; -} - - #endif /* !WIN32 */ -/* exports */ - - static inline float cpu_temp( int core ) { #ifdef WIN32 - return win32_cputemp( core ); + return 0.; #else return linux_cputemp( core ); #endif @@ -321,7 +308,7 @@ static inline void cpuid( unsigned int leaf, unsigned int subleaf, #endif } -#elif defined(__aarch64__) +#elif defined(__aarch64__) && !defined(__APPLE__) static inline void cpuid( unsigned int leaf, unsigned int subleaf, unsigned int output[4] ) @@ -495,11 +482,9 @@ static inline bool cpu_arch_aarch64() static inline bool has_sse() { #if defined(__x86_64__) - unsigned int cpu_info[4] = { 0 }; cpuid( CPU_INFO, 0, cpu_info ); return cpu_info[ EDX_Reg ] & SSE_Flag; - #else return false; #endif @@ -508,11 +493,9 @@ static inline bool has_sse() static inline bool has_sse2() { #if defined(__x86_64__) - unsigned int cpu_info[4] = { 0 }; cpuid( CPU_INFO, 0, cpu_info ); return cpu_info[ EDX_Reg ] & SSE2_Flag; - #else return false; #endif @@ -521,11 +504,9 @@ static inline bool has_sse2() static inline bool has_ssse3() { #if defined(__x86_64__) - unsigned int cpu_info[4] = { 0 }; cpuid( CPU_INFO, 0, cpu_info ); return cpu_info[ ECX_Reg ] & SSSE3_Flag; - #else return false; #endif @@ -534,11 +515,9 @@ static inline bool has_ssse3() static inline bool has_sse41() { #if defined(__x86_64__) - unsigned int cpu_info[4] = { 0 }; cpuid( CPU_INFO, 0, cpu_info ); return cpu_info[ ECX_Reg ] & SSE41_Flag; - #else return false; #endif @@ -547,11 +526,9 @@ static inline bool has_sse41() static inline bool has_sse42() { #if defined(__x86_64__) - unsigned int cpu_info[4] = { 0 }; cpuid( CPU_INFO, 0, cpu_info ); return cpu_info[ ECX_Reg ] & SSE42_Flag; - #else return false; #endif @@ -559,7 +536,7 @@ static inline bool has_sse42() static inline bool has_neon() { -#if defined(__aarch64__) +#if defined(__aarch64__) && !defined(__APPLE__) unsigned int cpu_info[4] = { 0 }; return cpu_info[0]; #else @@ -570,7 +547,6 @@ static inline bool has_neon() static inline bool has_aes_ni() { #if defined(__x86_64__) - if ( has_sse2() ) { unsigned int cpu_info[4] = { 0 }; @@ -578,9 +554,7 @@ static inline bool has_aes_ni() return cpu_info[ ECX_Reg ] & AES_NI_Flag; } return false; - -#elif defined(__aarch64__) - +#elif defined(__aarch64__) && !defined(__APPLE__) if ( has_neon() ) { unsigned int cpu_info[4] = { 0 }; @@ -588,7 +562,6 @@ static inline bool has_aes_ni() return cpu_info[0] & HWCAP_AES; } return false; - #else return false; #endif @@ -597,11 +570,9 @@ static inline bool has_aes_ni() static inline bool has_avx() { #if defined(__x86_64__) - unsigned int cpu_info[4] = { 0 }; cpuid( CPU_INFO, 0, cpu_info ); return ( ( cpu_info[ ECX_Reg ] & AVX_mask ) == AVX_mask ); - #else return false; #endif @@ -610,11 +581,9 @@ static inline bool has_avx() static inline bool has_avx2() { #if defined(__x86_64__) - unsigned int cpu_info[4] = { 0 }; cpuid( EXTENDED_FEATURES, 0, cpu_info ); return cpu_info[ EBX_Reg ] & AVX2_Flag; - #else return false; #endif @@ -623,7 +592,6 @@ static inline bool has_avx2() static inline bool has_sha() { #if defined(__x86_64__) - if ( has_avx() ) { unsigned int cpu_info[4] = { 0 }; @@ -631,9 +599,7 @@ static inline bool has_sha() return cpu_info[ EBX_Reg ] & SHA_Flag; } return false; - -#elif defined(__aarch64__) - +#elif defined(__aarch64__) && !defined(__APPLE__) if ( has_neon() ) { unsigned int cpu_info[4] = { 0 }; @@ -641,7 +607,6 @@ static inline bool has_sha() return cpu_info[0] & HWCAP_SHA2; } return false; - #else return false; #endif @@ -650,7 +615,6 @@ static inline bool has_sha() static inline bool has_sha512() { #if defined(__x86_64__) - if ( has_avx2() ) { unsigned int cpu_info[4] = { 0 }; @@ -658,9 +622,7 @@ static inline bool has_sha512() return cpu_info[ EAX_Reg ] & SHA512_Flag; } return false; - -#elif defined(__aarch64__) - +#elif defined(__aarch64__) && !defined(__APPLE__) if ( has_neon() ) { unsigned int cpu_info[4] = { 0 }; @@ -668,7 +630,6 @@ static inline bool has_sha512() return cpu_info[0] & HWCAP_SHA3; } return false; - #else return false; #endif @@ -677,7 +638,6 @@ static inline bool has_sha512() static inline bool has_avx512f() { #if defined(__x86_64__) - unsigned int cpu_info[4] = { 0 }; cpuid( EXTENDED_FEATURES, 0, cpu_info ); return cpu_info[ EBX_Reg ] & AVX512_F_Flag; @@ -689,7 +649,6 @@ static inline bool has_avx512f() static inline bool has_avx512dq() { #if defined(__x86_64__) - unsigned int cpu_info[4] = { 0 }; cpuid( EXTENDED_FEATURES, 0, cpu_info ); return cpu_info[ EBX_Reg ] & AVX512_DQ_Flag; @@ -701,7 +660,6 @@ static inline bool has_avx512dq() static inline bool has_avx512bw() { #if defined(__x86_64__) - unsigned int cpu_info[4] = { 0 }; cpuid( EXTENDED_FEATURES, 0, cpu_info ); return cpu_info[ EBX_Reg ] & AVX512_BW_Flag; @@ -713,7 +671,6 @@ static inline bool has_avx512bw() static inline bool has_avx512vl() { #if defined(__x86_64__) - unsigned int cpu_info[4] = { 0 }; cpuid( EXTENDED_FEATURES, 0, cpu_info ); return cpu_info[ EBX_Reg ] & AVX512_VL_Flag; @@ -722,14 +679,13 @@ static inline bool has_avx512vl() #endif } +// baseline for useability static inline bool has_avx512() { #if defined(__x86_64__) - unsigned int cpu_info[4] = { 0 }; cpuid( EXTENDED_FEATURES, 0, cpu_info ); return ( ( cpu_info[ EBX_Reg ] & AVX512_mask ) == AVX512_mask ); - #else return false; #endif @@ -738,7 +694,6 @@ static inline bool has_avx512() static inline bool has_vaes() { #if defined(__x86_64__) - if ( has_avx2() ) { unsigned int cpu_info[4] = { 0 }; @@ -754,11 +709,9 @@ static inline bool has_vaes() static inline bool has_vbmi() { #if defined(__x86_64__) - unsigned int cpu_info[4] = { 0 }; cpuid( EXTENDED_FEATURES, 0, cpu_info ); return cpu_info[ ECX_Reg ] & AVX512_VBMI_Flag; - #else return false; #endif @@ -767,7 +720,6 @@ static inline bool has_vbmi() static inline bool has_vbmi2() { #if defined(__x86_64__) - unsigned int cpu_info[4] = { 0 }; cpuid( EXTENDED_FEATURES, 0, cpu_info ); return cpu_info[ ECX_Reg ] & AVX512_VBMI2_Flag; @@ -780,7 +732,6 @@ static inline bool has_vbmi2() static inline bool has_xop() { #if defined(__x86_64__) - unsigned int cpu_info[4] = { 0 }; cpuid( EXTENDED_CPU_INFO, 0, cpu_info ); return cpu_info[ ECX_Reg ] & XOP_Flag; @@ -792,11 +743,9 @@ static inline bool has_xop() static inline bool has_fma3() { #if defined(__x86_64__) - unsigned int cpu_info[4] = { 0 }; cpuid( CPU_INFO, 0, cpu_info ); return ( ( cpu_info[ ECX_Reg ] & FMA3_mask ) == FMA3_mask ); - #else return false; #endif @@ -805,24 +754,21 @@ static inline bool has_fma3() static inline bool has_apx_f() { #if defined(__x86_64__) - unsigned int cpu_info[4] = { 0 }; cpuid( EXTENDED_FEATURES, 1, cpu_info ); return cpu_info[ EDX_Reg ] & APX_F_Flag; - #else return false; #endif } +// Not much use on it's own static inline bool has_avx10() { #if defined(__x86_64__) - unsigned int cpu_info[4] = { 0 }; cpuid( EXTENDED_FEATURES, 1, cpu_info ); return cpu_info[ EDX_Reg ] & AVX10_Flag; - #else return false; #endif @@ -831,7 +777,6 @@ static inline bool has_avx10() static inline unsigned int avx10_version() { #if defined(__x86_64__) - if ( has_avx10() ) { unsigned int cpu_info[4] = { 0 }; @@ -839,7 +784,6 @@ static inline unsigned int avx10_version() return cpu_info[ EBX_Reg ] & AVX10_VERSION_mask; } return 0; - #else return 0; #endif @@ -849,7 +793,6 @@ static inline unsigned int avx10_version() static inline bool has_avx10_512() { #if defined(__x86_64__) - if ( has_avx10() ) { unsigned int cpu_info[4] = { 0 }; @@ -857,17 +800,15 @@ static inline bool has_avx10_512() return cpu_info[ EBX_Reg ] & AVX10_512_Flag; } return false; - #else return false; #endif } -// may not include 512 +// Includes 128 but may not include 512 static inline bool has_avx10_256() { #if defined(__x86_64__) - if ( has_avx10() ) { unsigned int cpu_info[4] = { 0 }; @@ -875,7 +816,6 @@ static inline bool has_avx10_256() return cpu_info[ EBX_Reg ] & AVX10_256_Flag; } return false; - #else return false; #endif @@ -885,7 +825,6 @@ static inline bool has_avx10_256() static inline unsigned int avx10_vector_length() { #if defined(__x86_64__) - if ( has_avx10() ) { unsigned int cpu_info[4] = { 0 }; @@ -894,16 +833,12 @@ static inline unsigned int avx10_vector_length() : ( cpu_info[ EBX_Reg ] & AVX10_256_Flag ? 256 : 0 ); } return 0; - #else return 0; #endif } - - - static inline uint32_t cpuid_get_highest_function_number() { #if defined(__x86_64__) @@ -922,7 +857,7 @@ static inline void cpuid_get_highest_function( char* s ) { #if defined(__x86_64__) - uint32_t fn = cpuid_get_highest_function_number(); + uint32_t fn = cpuid_get_highest_function_number(); switch (fn) { case 0x16: diff --git a/winbuild-cross.sh b/winbuild-cross.sh index 9f1721e0..a33a6d0f 100755 --- a/winbuild-cross.sh +++ b/winbuild-cross.sh @@ -10,12 +10,14 @@ # define some local variables export LOCAL_LIB="$HOME/usr/lib" -export CONFIGURE_ARGS="--with-curl=$LOCAL_LIB/curl --with-crypto=$LOCAL_LIB/openssl --host=x86_64-w64-mingw32" +export CONFIGURE_ARGS="--with-curl=$LOCAL_LIB/curl --host=x86_64-w64-mingw32" +#export CONFIGURE_ARGS="--with-curl=$LOCAL_LIB/curl --with-crypto=$LOCAL_LIB/openssl --host=x86_64-w64-mingw32" export MINGW_LIB="/usr/x86_64-w64-mingw32/lib" # set correct gcc version export GCC_MINGW_LIB="/usr/lib/gcc/x86_64-w64-mingw32/9.3-win32" # used by GCC -export LDFLAGS="-L$LOCAL_LIB/curl/lib/.libs -L$LOCAL_LIB/gmp/.libs -L$LOCAL_LIB/openssl" +export LDFLAGS="-L$LOCAL_LIB/curl/lib/.libs -L$LOCAL_LIB/gmp/.libs" +#export LDFLAGS="-L$LOCAL_LIB/curl/lib/.libs -L$LOCAL_LIB/gmp/.libs -L$LOCAL_LIB/openssl" # Support for Windows 7 CPU groups, AES sometimes not included in -march # CPU groups disabled due to incompatibilities between Intel and AMD CPUs. #export DEFAULT_CFLAGS="-maes -O3 -Wall -D_WIN32_WINNT=0x0601" @@ -38,7 +40,7 @@ cp $MINGW_LIB/zlib1.dll release/ cp $MINGW_LIB/libwinpthread-1.dll release/ cp $GCC_MINGW_LIB/libstdc++-6.dll release/ cp $GCC_MINGW_LIB/libgcc_s_seh-1.dll release/ -cp ./../libcrypto-1_1-x64.dll release/ +#cp ./../libcrypto-1_1-x64.dll release/ cp $LOCAL_LIB/curl/lib/.libs/libcurl-4.dll release/ # Start building...