diff --git a/README.txt b/README.txt index 22428ec1..3776d85e 100644 --- a/README.txt +++ b/README.txt @@ -18,14 +18,14 @@ error to find the fastest one that works. Pay attention to the features listed at cpuminer startup to ensure you are mining at optimum speed using the best available features. -Architecture names and compile options used are only provided for Intel -Core series. Budget CPUs like Pentium and Celeron are often missing some -features. +Architecture names and compile options used are only provided for +mainstream desktop CPUs. Budget CPUs like Pentium and Celeron are often +missing some features. Check your CPU. -AMD CPUs older than Piledriver, including Athlon x2 and Phenom II x4, are not -supported by cpuminer-opt due to an incompatible implementation of SSE2 on -these CPUs. Some algos may crash the miner with an invalid instruction. -Users are recommended to use an unoptimized miner such as cpuminer-multi. +Support for AMD CPUs older than Ryzen is incomplete and without specific +recommendations. Find the best fit. CPUs older than Piledriver, including +Athlon x2 and Phenom II x4, are not supported by cpuminer-opt due to an +incompatible implementation of SSE2 on these CPUs. More information for Intel and AMD CPU architectures and their features can be found on Wikipedia. @@ -34,26 +34,21 @@ https://en.wikipedia.org/wiki/List_of_Intel_CPU_microarchitectures https://en.wikipedia.org/wiki/List_of_AMD_CPU_microarchitectures +File name Architecture name -Exe file name Compile flags Arch name +cpuminer-sse2.exe Core2, Nehalem, generic x86_64 with SSE2 +cpuminer-aes-sse42.exe Westmere +cpuminer-avx.exe Sandybridge, Ivybridge +cpuminer-avx2.exe Haswell, Skylake, Kabylake, Coffeelake, Cometlake +cpuminer-avx2-sha.exe AMD Zen1, Zen2 +cpuminer-avx2-sha-vaes.exe Intel Alderlake*, AMD Zen3 +cpuminer-avx512.exe Intel HEDT Skylake-X, Cascadelake +cpuminer-avx512-sha-vaes.exe Icelake, Tigerlake, Rocketlake -cpuminer-sse2.exe "-msse2" Core2, Nehalem -cpuminer-aes-sse42.exe "-march=westmere" Westmere -cpuminer-avx.exe "-march=corei7-avx" Sandybridge, Ivybridge -cpuminer-avx2.exe "-march=core-avx2 -maes" Haswell(1) -cpuminer-avx512.exe "-march=skylake-avx512" Skylake-X, Cascadelake -cpuminer-avx512-sha.exe "-march=cascadelake -msha" Rocketlake(2) -cpuminer-avx512-sha-vaes.exe "-march=icelake-client" Icelake, Tigerlake(3) -cpuminer-zen.exe "-march=znver1" AMD Zen1, Zen2 -cpuminer-zen3.exe "-march=znver2 -mvaes" Zen3(4) - -(1) Haswell includes Broadwell, Skylake, Kabylake, Coffeelake & Cometlake. -(2) Rocketlake build uses cascadelake+sha as a workaround until Rocketlake - compiler support is avalable. -(3) Icelake & Tigerlake are only available on some laptops. Mining with a - laptop is not recommended. -(4) Zen3 build uses zen2+vaes as a workaround until Zen3 compiler support is - available. Zen2 CPUs should use Zen1 build. +* Alderlake is a hybrid architecture. With the E-cores disabled it may be + possible to enable AVX512 on the the P-cores and use the avx512-sha-vaes + build. This is not officially supported by Intel at time of writing. + Check for current information. Notes about included DLL files: @@ -66,8 +61,7 @@ https://github.com/JayDDee/cpuminer-opt/wiki/Compiling-from-source Some DLL filess may already be installed on the system by Windows or third party packages. They often will work and may be used instead of the included -file. Without a compelling reason to do so it's recommended to use the included -files as they are packaged. +file. If you like this software feel free to donate: diff --git a/RELEASE_NOTES b/RELEASE_NOTES index ce7752b4..e22c2baa 100644 --- a/RELEASE_NOTES +++ b/RELEASE_NOTES @@ -65,6 +65,18 @@ If not what makes it happen or not happen? Change Log ---------- +v3.19.1 + +Changes to Windows binaries package: + - builds for CPUs with AVX or lower have CPU groups disabled, + - zen3 build renamed to avx2-sha-vaes to support Alderlake as well as Zen3, + - zen build renamed to avx2-sha, supports Zen1 & Zen2, + - avx512-sha build removed, Rocketlake CPUs can use avx512-sha-vaes, + - see README.txt for compatibility details. + +Fixed a few compiler warnings that are new in GCC 11. +Other minor fixes. + v3.19.0 Windows binaries now built with support for CPU groups, requires Windows 7. diff --git a/algo-gate-api.h b/algo-gate-api.h index 56594d59..07108021 100644 --- a/algo-gate-api.h +++ b/algo-gate-api.h @@ -97,7 +97,6 @@ typedef uint32_t set_t; #define SHA_OPT 0x20 // Zen1, Icelake (sha256) #define AVX512_OPT 0x40 // Skylake-X (AVX512[F,VL,DQ,BW]) #define VAES_OPT 0x80 // Icelake (VAES & AVX512) -#define VAES256_OPT 0x100 // Zen3 (VAES without AVX512) // return set containing all elements from sets a & b diff --git a/algo/blake/decred-gate.c b/algo/blake/decred-gate.c index 0a90de7f..9c58b21b 100644 --- a/algo/blake/decred-gate.c +++ b/algo/blake/decred-gate.c @@ -8,7 +8,7 @@ uint32_t *decred_get_nonceptr( uint32_t *work_data ) return &work_data[ DECRED_NONCE_INDEX ]; } -double decred_calc_network_diff( struct work* work ) +long double decred_calc_network_diff( struct work* work ) { // sample for diff 43.281 : 1c05ea29 // todo: endian reversed on longpoll could be zr5 specific... @@ -16,7 +16,7 @@ double decred_calc_network_diff( struct work* work ) uint32_t bits = ( nbits & 0xffffff ); int16_t shift = ( swab32(nbits) & 0xff ); // 0x1c = 28 int m; - double d = (double)0x0000ffff / (double)bits; + long double d = (long double)0x0000ffff / (long double)bits; for ( m = shift; m < 29; m++ ) d *= 256.0; @@ -25,7 +25,7 @@ double decred_calc_network_diff( struct work* work ) if ( shift == 28 ) d *= 256.0; // testnet if ( opt_debug_diff ) - applog( LOG_DEBUG, "net diff: %f -> shift %u, bits %08x", d, + applog( LOG_DEBUG, "net diff: %f -> shift %u, bits %08x", (double)d, shift, bits ); return net_diff; } diff --git a/algo/lyra2/lyra2-gate.c b/algo/lyra2/lyra2-gate.c index c1d70e7d..8804c41c 100644 --- a/algo/lyra2/lyra2-gate.c +++ b/algo/lyra2/lyra2-gate.c @@ -188,7 +188,7 @@ bool register_allium_algo( algo_gate_t* gate ) gate->hash = (void*)&allium_hash; #endif gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT - | VAES_OPT | VAES256_OPT; + | VAES_OPT; opt_target_factor = 256.0; return true; }; diff --git a/algo/ripemd/lbry-gate.c b/algo/ripemd/lbry-gate.c index ba38c651..d962f501 100644 --- a/algo/ripemd/lbry-gate.c +++ b/algo/ripemd/lbry-gate.c @@ -4,7 +4,7 @@ #include #include -double lbry_calc_network_diff( struct work *work ) +long double lbry_calc_network_diff( struct work *work ) { // sample for diff 43.281 : 1c05ea29 // todo: endian reversed on longpoll could be zr5 specific... @@ -12,7 +12,7 @@ double lbry_calc_network_diff( struct work *work ) uint32_t nbits = swab32( work->data[ LBRY_NBITS_INDEX ] ); uint32_t bits = (nbits & 0xffffff); int16_t shift = (swab32(nbits) & 0xff); // 0x1c = 28 - double d = (double)0x0000ffff / (double)bits; + long double d = (long double)0x0000ffff / (long double)bits; for (int m=shift; m < 29; m++) d *= 256.0; for (int m=29; m < shift; m++) d /= 256.0; diff --git a/algo/ripemd/sph_ripemd.c b/algo/ripemd/sph_ripemd.c index f2954971..9273fb8d 100644 --- a/algo/ripemd/sph_ripemd.c +++ b/algo/ripemd/sph_ripemd.c @@ -479,7 +479,7 @@ sph_ripemd_comp(const sph_u32 msg[16], sph_u32 val[4]) * One round of RIPEMD-128. The data must be aligned for 32-bit access. */ static void -ripemd128_round(const unsigned char *data, sph_u32 r[5]) +ripemd128_round(const unsigned char *data, sph_u32 r[4]) { #if SPH_LITTLE_FAST diff --git a/algo/swifftx/swifftx-4way.c b/algo/swifftx/swifftx-4way.c deleted file mode 100644 index cc003f02..00000000 --- a/algo/swifftx/swifftx-4way.c +++ /dev/null @@ -1,912 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////////////////////// -// -// SWIFFTX ANSI C OPTIMIZED 32BIT IMPLEMENTATION FOR NIST SHA-3 COMPETITION -// -// SWIFFTX.c -// -// October 2008 -// -// This is the source file of the OPTIMIZED 32BIT implementation of SWIFFTX hash function. -// SWIFFTX is a candidate function for SHA-3 NIST competition. -// More details about SWIFFTX can be found in the accompanying submission documents. -// -/////////////////////////////////////////////////////////////////////////////////////////////// -#include "swifftx.h" -// See the remarks concerning compatibility issues inside stdint.h. -#include "stdint.h" -// Remove this while using gcc: -//#include "stdbool.h" -#include - -/////////////////////////////////////////////////////////////////////////////////////////////// -// Constants and static tables portion. -/////////////////////////////////////////////////////////////////////////////////////////////// - -// In SWIFFTX we work over Z_257, so this is the modulus and the arithmetic is performed modulo -// this number. -#define FIELD_SIZE 257 - -// The size of FFT we use: -#define N 64 - -#define LOGN 6 - -#define EIGHTH_N (N / 8) - -// The number of FFTS done on the input. -#define M (SWIFFTX_INPUT_BLOCK_SIZE / 8) // 32 - -// Omega is the 128th root of unity in Z_257. -// We choose w = 42. -#define OMEGA 42 - -// The size of the inner FFT lookup table: -#define W 8 - -// Calculates the sum and the difference of two numbers. -// -// Parameters: -// - A: the first operand. After the operation stores the sum of the two operands. -// - B: the second operand. After the operation stores the difference between the first and the -// second operands. -#define ADD_SUB_4WAY( A, B ) \ -{ \ - __m128i temp = B; \ - B = _mm_sub_epi32( A, B ); \ - A = _mm_add_epi32( A, temp ); \ -} - - -//#define ADD_SUB(A, B) {register int temp = (B); B = ((A) - (B)); A = ((A) + (temp));} - -// Quickly reduces an integer modulo 257. -// -// Parameters: -// - A: the input. - -#define Q_REDUCE( A ) ( _mm_sub_epi32( \ - _mm_and_epi32( A, m128_const1_32( 0xff ) ), \ - _mm_srli_epi32( A, 8 ) ) ) - -//#define Q_REDUCE(A) (((A) & 0xff) - ((A) >> 8)) - -// Since we need to do the setup only once, this is the indicator variable: -static bool wasSetupDone = false; - -// This array stores the powers of omegas that correspond to the indices, which are the input -// values. Known also as the "outer FFT twiddle factors". -swift_int16_t multipliers[N]; - -// This array stores the powers of omegas, multiplied by the corresponding values. -// We store this table to save computation time. -// -// To calculate the intermediate value of the compression function (the first out of two -// stages), we multiply the k-th bit of x_i by w^[(2i + 1) * k]. {x_i} is the input to the -// compression function, i is between 0 and 31, x_i is a 64-bit value. -// One can see the formula for this (intermediate) stage in the SWIFFT FSE 2008 paper -- -// formula (2), section 3, page 6. -swift_int16_t fftTable[256 * EIGHTH_N]; - -// The A's we use in SWIFFTX shall be random elements of Z_257. -// We generated these A's from the decimal expansion of PI as follows: we converted each -// triple of digits into a decimal number d. If d < (257 * 3) we used (d % 257) for the next A -// element, otherwise move to the next triple of digits in the expansion. This guarntees that -// the A's are random, provided that PI digits are. -const swift_int16_t As[3 * M * N] = -{141, 78, 139, 75, 238, 205, 129, 126, 22, 245, 197, 169, 142, 118, 105, 78, - 50, 149, 29, 208, 114, 34, 85, 117, 67, 148, 86, 256, 25, 49, 133, 93, - 95, 36, 68, 231, 211, 102, 151, 128, 224, 117, 193, 27, 102, 187, 7, 105, - 45, 130, 108, 124, 171, 151, 189, 128, 218, 134, 233, 165, 14, 201, 145, 134, - 52, 203, 91, 96, 197, 69, 134, 213, 136, 93, 3, 249, 141, 16, 210, 73, - 6, 92, 58, 74, 174, 6, 254, 91, 201, 107, 110, 76, 103, 11, 73, 16, - 34, 209, 7, 127, 146, 254, 95, 176, 57, 13, 108, 245, 77, 92, 186, 117, - 124, 97, 105, 118, 34, 74, 205, 122, 235, 53, 94, 238, 210, 227, 183, 11, - 129, 159, 105, 183, 142, 129, 86, 21, 137, 138, 224, 223, 190, 188, 179, 188, - 256, 25, 217, 176, 36, 176, 238, 127, 160, 210, 155, 148, 132, 0, 54, 127, - 145, 6, 46, 85, 243, 95, 173, 123, 178, 207, 211, 183, 224, 173, 146, 35, - 71, 114, 50, 22, 175, 1, 28, 19, 112, 129, 21, 34, 161, 159, 115, 52, - 4, 193, 211, 92, 115, 49, 59, 217, 218, 96, 61, 81, 24, 202, 198, 89, - 45, 128, 8, 51, 253, 87, 171, 35, 4, 188, 171, 10, 3, 137, 238, 73, - 19, 208, 124, 163, 103, 177, 155, 147, 46, 84, 253, 233, 171, 241, 211, 217, - 159, 48, 96, 79, 237, 18, 171, 226, 99, 1, 97, 195, 216, 163, 198, 95, - 0, 201, 65, 228, 21, 153, 124, 230, 44, 35, 44, 108, 85, 156, 249, 207, - 26, 222, 131, 1, 60, 242, 197, 150, 181, 19, 116, 213, 75, 98, 124, 240, - 123, 207, 62, 255, 60, 143, 187, 157, 139, 9, 12, 104, 89, 49, 193, 146, - 104, 196, 181, 82, 198, 253, 192, 191, 255, 122, 212, 104, 47, 20, 132, 208, - 46, 170, 2, 69, 234, 36, 56, 163, 28, 152, 104, 238, 162, 56, 24, 58, - 38, 150, 193, 254, 253, 125, 173, 35, 73, 126, 247, 239, 216, 6, 199, 15, - 90, 12, 97, 122, 9, 84, 207, 127, 219, 72, 58, 30, 29, 182, 41, 192, - 235, 248, 237, 74, 72, 176, 210, 252, 45, 64, 165, 87, 202, 241, 236, 223, - 151, 242, 119, 239, 52, 112, 169, 28, 13, 37, 160, 60, 158, 81, 133, 60, - 16, 145, 249, 192, 173, 217, 214, 93, 141, 184, 54, 34, 161, 104, 157, 95, - 38, 133, 218, 227, 211, 181, 9, 66, 137, 143, 77, 33, 248, 159, 4, 55, - 228, 48, 99, 219, 222, 184, 15, 36, 254, 256, 157, 237, 87, 139, 209, 113, - 232, 85, 126, 167, 197, 100, 103, 166, 64, 225, 125, 205, 117, 135, 84, 128, - 231, 112, 90, 241, 28, 22, 210, 147, 186, 49, 230, 21, 108, 39, 194, 47, - 123, 199, 107, 114, 30, 210, 250, 143, 59, 156, 131, 133, 221, 27, 76, 99, - 208, 250, 78, 12, 211, 141, 95, 81, 195, 106, 8, 232, 150, 212, 205, 221, - 11, 225, 87, 219, 126, 136, 137, 180, 198, 48, 68, 203, 239, 252, 194, 235, - 142, 137, 174, 172, 190, 145, 250, 221, 182, 204, 1, 195, 130, 153, 83, 241, - 161, 239, 211, 138, 11, 169, 155, 245, 174, 49, 10, 166, 16, 130, 181, 139, - 222, 222, 112, 99, 124, 94, 51, 243, 133, 194, 244, 136, 35, 248, 201, 177, - 178, 186, 129, 102, 89, 184, 180, 41, 149, 96, 165, 72, 225, 231, 134, 158, - 199, 28, 249, 16, 225, 195, 10, 210, 164, 252, 138, 8, 35, 152, 213, 199, - 82, 116, 97, 230, 63, 199, 241, 35, 79, 120, 54, 174, 67, 112, 1, 76, - 69, 222, 194, 96, 82, 94, 25, 228, 196, 145, 155, 136, 228, 234, 46, 101, - 246, 51, 103, 166, 246, 75, 9, 200, 161, 4, 108, 35, 129, 168, 208, 144, - 50, 14, 13, 220, 41, 132, 122, 127, 194, 9, 232, 234, 107, 28, 187, 8, - 51, 141, 97, 221, 225, 9, 113, 170, 166, 102, 135, 22, 231, 185, 227, 187, - 110, 145, 251, 146, 76, 22, 146, 228, 7, 53, 64, 25, 62, 198, 130, 190, - 221, 232, 169, 64, 188, 199, 237, 249, 173, 218, 196, 191, 48, 224, 5, 113, - 100, 166, 160, 21, 191, 197, 61, 162, 149, 171, 240, 183, 129, 231, 123, 204, - 192, 179, 134, 15, 47, 161, 142, 177, 239, 234, 186, 237, 231, 53, 208, 95, - 146, 36, 225, 231, 89, 142, 93, 248, 137, 124, 83, 39, 69, 77, 89, 208, - 182, 48, 85, 147, 244, 164, 246, 68, 38, 190, 220, 35, 202, 91, 157, 151, - 201, 240, 185, 218, 4, 152, 2, 132, 177, 88, 190, 196, 229, 74, 220, 135, - 137, 196, 11, 47, 5, 251, 106, 144, 163, 60, 222, 127, 52, 57, 202, 102, - 64, 140, 110, 206, 23, 182, 39, 245, 1, 163, 157, 186, 163, 80, 7, 230, - 44, 249, 176, 102, 164, 125, 147, 120, 18, 191, 186, 125, 64, 65, 198, 157, - 164, 213, 95, 61, 13, 181, 208, 91, 242, 197, 158, 34, 98, 169, 91, 14, - 17, 93, 157, 17, 65, 30, 183, 6, 139, 58, 255, 108, 100, 136, 209, 144, - 164, 6, 237, 33, 210, 110, 57, 126, 197, 136, 125, 244, 165, 151, 168, 3, - 143, 251, 247, 155, 136, 130, 88, 14, 74, 121, 250, 133, 21, 226, 185, 232, - 118, 132, 89, 64, 204, 161, 2, 70, 224, 159, 35, 204, 123, 180, 13, 52, - 231, 57, 25, 78, 66, 69, 97, 42, 198, 84, 176, 59, 8, 232, 125, 134, - 193, 2, 232, 109, 216, 69, 90, 142, 32, 38, 249, 37, 75, 180, 184, 188, - 19, 47, 120, 87, 146, 70, 232, 120, 191, 45, 33, 38, 19, 248, 110, 110, - 44, 64, 2, 84, 244, 228, 252, 228, 170, 123, 38, 144, 213, 144, 171, 212, - 243, 87, 189, 46, 128, 110, 84, 77, 65, 183, 61, 184, 101, 44, 168, 68, - 14, 106, 105, 8, 227, 211, 166, 39, 152, 43, 52, 254, 197, 55, 119, 89, - 168, 65, 53, 138, 177, 56, 219, 0, 58, 121, 148, 18, 44, 100, 215, 103, - 145, 229, 117, 196, 91, 89, 113, 143, 172, 239, 249, 184, 154, 39, 112, 65, - 204, 42, 84, 38, 155, 151, 151, 16, 100, 87, 174, 162, 145, 147, 149, 186, - 237, 145, 134, 144, 198, 235, 213, 163, 48, 230, 24, 47, 57, 71, 127, 0, - 150, 219, 12, 81, 197, 150, 131, 13, 169, 63, 175, 184, 48, 235, 65, 243, - 149, 200, 163, 254, 202, 114, 247, 67, 143, 250, 126, 228, 80, 130, 216, 214, - 36, 2, 230, 33, 119, 125, 3, 142, 237, 100, 3, 152, 197, 174, 244, 129, - 232, 30, 206, 199, 39, 210, 220, 43, 237, 221, 201, 54, 179, 42, 28, 133, - 246, 203, 198, 177, 0, 28, 194, 85, 223, 109, 155, 147, 221, 60, 133, 108, - 157, 254, 26, 75, 157, 185, 49, 142, 31, 137, 71, 43, 63, 64, 237, 148, - 237, 172, 159, 160, 155, 254, 234, 224, 140, 193, 114, 140, 62, 109, 136, 39, - 255, 8, 158, 146, 128, 49, 222, 96, 57, 209, 180, 249, 202, 127, 113, 231, - 78, 178, 46, 33, 228, 215, 104, 31, 207, 186, 82, 41, 42, 39, 103, 119, - 123, 133, 243, 254, 238, 156, 90, 186, 37, 212, 33, 107, 252, 51, 177, 36, - 237, 76, 159, 245, 93, 214, 97, 56, 190, 38, 160, 94, 105, 222, 220, 158, - 49, 16, 191, 52, 120, 87, 179, 2, 27, 144, 223, 230, 184, 6, 129, 227, - 69, 47, 215, 181, 162, 139, 72, 200, 45, 163, 159, 62, 2, 221, 124, 40, - 159, 242, 35, 208, 179, 166, 98, 67, 178, 68, 143, 225, 178, 146, 187, 159, - 57, 66, 176, 192, 236, 250, 168, 224, 122, 43, 159, 120, 133, 165, 122, 64, - 87, 74, 161, 241, 9, 87, 90, 24, 255, 113, 203, 220, 57, 139, 197, 159, - 31, 151, 27, 140, 77, 162, 7, 27, 84, 228, 187, 220, 53, 126, 162, 242, - 84, 181, 223, 103, 86, 177, 207, 31, 140, 18, 207, 256, 201, 166, 96, 23, - 233, 103, 197, 84, 161, 75, 59, 149, 138, 154, 119, 92, 16, 53, 116, 97, - 220, 114, 35, 45, 77, 209, 40, 196, 71, 22, 81, 178, 110, 14, 3, 180, - 110, 129, 112, 47, 18, 61, 134, 78, 73, 79, 254, 232, 125, 180, 205, 54, - 220, 119, 63, 89, 181, 52, 77, 109, 151, 77, 80, 207, 144, 25, 20, 6, - 208, 47, 201, 206, 192, 14, 73, 176, 256, 201, 207, 87, 216, 60, 56, 73, - 92, 243, 179, 113, 49, 59, 55, 168, 121, 137, 69, 154, 95, 57, 187, 47, - 129, 4, 15, 92, 6, 116, 69, 196, 48, 134, 84, 81, 111, 56, 38, 176, - 239, 6, 128, 72, 242, 134, 36, 221, 59, 48, 242, 68, 130, 110, 171, 89, - 13, 220, 48, 29, 5, 75, 104, 233, 91, 129, 105, 162, 44, 113, 163, 163, - 85, 147, 190, 111, 197, 80, 213, 153, 81, 68, 203, 33, 161, 165, 10, 61, - 120, 252, 0, 205, 28, 42, 193, 64, 39, 37, 83, 175, 5, 218, 215, 174, - 128, 121, 231, 11, 150, 145, 135, 197, 136, 91, 193, 5, 107, 88, 82, 6, - 4, 188, 256, 70, 40, 2, 167, 57, 169, 203, 115, 254, 215, 172, 84, 80, - 188, 167, 34, 137, 43, 243, 2, 79, 178, 38, 188, 135, 233, 194, 208, 13, - 11, 151, 231, 196, 12, 122, 162, 56, 17, 114, 191, 207, 90, 132, 64, 238, - 187, 6, 198, 176, 240, 88, 118, 236, 15, 226, 166, 22, 193, 229, 82, 246, - 213, 64, 37, 63, 31, 243, 252, 37, 156, 38, 175, 204, 138, 141, 211, 82, - 106, 217, 97, 139, 153, 56, 129, 218, 158, 9, 83, 26, 87, 112, 71, 21, - 250, 5, 65, 141, 68, 116, 231, 113, 10, 218, 99, 205, 201, 92, 157, 4, - 97, 46, 49, 220, 72, 139, 103, 171, 149, 129, 193, 19, 69, 245, 43, 31, - 58, 68, 36, 195, 159, 22, 54, 34, 233, 141, 205, 100, 226, 96, 22, 192, - 41, 231, 24, 79, 234, 138, 30, 120, 117, 216, 172, 197, 172, 107, 86, 29, - 181, 151, 0, 6, 146, 186, 68, 55, 54, 58, 213, 182, 60, 231, 33, 232, - 77, 210, 216, 154, 80, 51, 141, 122, 68, 148, 219, 122, 254, 48, 64, 175, - 41, 115, 62, 243, 141, 81, 119, 121, 5, 68, 121, 88, 239, 29, 230, 90, - 135, 159, 35, 223, 168, 112, 49, 37, 146, 60, 126, 134, 42, 145, 115, 90, - 73, 133, 211, 86, 120, 141, 122, 241, 127, 56, 130, 36, 174, 75, 83, 246, - 112, 45, 136, 194, 201, 115, 1, 156, 114, 167, 208, 12, 176, 147, 32, 170, - 251, 100, 102, 220, 122, 210, 6, 49, 75, 201, 38, 105, 132, 135, 126, 102, - 13, 121, 76, 228, 202, 20, 61, 213, 246, 13, 207, 42, 148, 168, 37, 253, - 34, 94, 141, 185, 18, 234, 157, 109, 104, 64, 250, 125, 49, 236, 86, 48, - 196, 77, 75, 237, 156, 103, 225, 19, 110, 229, 22, 68, 177, 93, 221, 181, - 152, 153, 61, 108, 101, 74, 247, 195, 127, 216, 30, 166, 168, 61, 83, 229, - 120, 156, 96, 120, 201, 124, 43, 27, 253, 250, 120, 143, 89, 235, 189, 243, - 150, 7, 127, 119, 149, 244, 84, 185, 134, 34, 128, 193, 236, 234, 132, 117, - 137, 32, 145, 184, 44, 121, 51, 76, 11, 228, 142, 251, 39, 77, 228, 251, - 41, 58, 246, 107, 125, 187, 9, 240, 35, 8, 11, 162, 242, 220, 158, 163, - 2, 184, 163, 227, 242, 2, 100, 101, 2, 78, 129, 34, 89, 28, 26, 157, - 79, 31, 107, 250, 194, 156, 186, 69, 212, 66, 41, 180, 139, 42, 211, 253, - 256, 239, 29, 129, 104, 248, 182, 68, 1, 189, 48, 226, 36, 229, 3, 158, - 41, 53, 241, 22, 115, 174, 16, 163, 224, 19, 112, 219, 177, 233, 42, 27, - 250, 134, 18, 28, 145, 122, 68, 34, 134, 31, 147, 17, 39, 188, 150, 76, - 45, 42, 167, 249, 12, 16, 23, 182, 13, 79, 121, 3, 70, 197, 239, 44, - 86, 177, 255, 81, 64, 171, 138, 131, 73, 110, 44, 201, 254, 198, 146, 91, - 48, 9, 104, 31, 29, 161, 101, 31, 138, 180, 231, 233, 79, 137, 61, 236, - 140, 15, 249, 218, 234, 119, 99, 195, 110, 137, 237, 207, 8, 31, 45, 24, - 90, 155, 203, 253, 192, 203, 65, 176, 210, 171, 142, 214, 220, 122, 136, 237, - 189, 186, 147, 40, 80, 254, 173, 33, 191, 46, 192, 26, 108, 255, 228, 205, - 61, 76, 39, 107, 225, 126, 228, 182, 140, 251, 143, 134, 252, 168, 221, 8, - 185, 85, 60, 233, 147, 244, 87, 137, 8, 140, 96, 80, 53, 45, 175, 160, - 124, 189, 112, 37, 144, 19, 70, 17, 170, 242, 2, 3, 28, 95, 120, 199, - 212, 43, 9, 117, 86, 151, 101, 241, 200, 145, 241, 19, 178, 69, 204, 197, - 227, 166, 94, 7, 193, 45, 247, 234, 19, 187, 212, 212, 236, 125, 33, 95, - 198, 121, 122, 103, 77, 155, 235, 49, 25, 237, 249, 11, 162, 7, 238, 24, - 16, 150, 129, 25, 152, 17, 42, 67, 247, 162, 77, 154, 31, 133, 55, 137, - 79, 119, 153, 10, 86, 28, 244, 186, 41, 169, 106, 44, 10, 49, 110, 179, - 32, 133, 155, 244, 61, 70, 131, 168, 170, 39, 231, 252, 32, 69, 92, 238, - 239, 35, 132, 136, 236, 167, 90, 32, 123, 88, 69, 22, 20, 89, 145, 166, - 30, 118, 75, 4, 49, 31, 225, 54, 11, 50, 56, 191, 246, 1, 187, 33, - 119, 107, 139, 68, 19, 240, 131, 55, 94, 113, 31, 252, 12, 179, 121, 2, - 120, 252, 0, 76, 41, 80, 185, 42, 62, 121, 105, 159, 121, 109, 111, 98, - 7, 118, 86, 29, 210, 70, 231, 179, 223, 229, 164, 70, 62, 47, 0, 206, - 204, 178, 168, 120, 224, 166, 99, 25, 103, 63, 246, 224, 117, 204, 75, 124, - 140, 133, 110, 110, 222, 88, 151, 118, 46, 37, 22, 143, 158, 40, 2, 50, - 153, 94, 190, 199, 13, 198, 127, 211, 180, 90, 183, 98, 0, 142, 210, 154, - 100, 187, 67, 231, 202, 100, 198, 235, 252, 160, 247, 124, 247, 14, 121, 221, - 57, 88, 253, 243, 185, 89, 45, 249, 221, 194, 108, 175, 193, 119, 50, 141, - 223, 133, 136, 64, 176, 250, 129, 100, 124, 94, 181, 159, 99, 185, 177, 240, - 135, 42, 103, 52, 202, 208, 143, 186, 193, 103, 154, 237, 102, 88, 225, 161, - 50, 188, 191, 109, 12, 87, 19, 227, 247, 183, 13, 52, 205, 170, 205, 146, - 89, 160, 18, 105, 192, 73, 231, 225, 184, 157, 252, 220, 61, 59, 169, 183, - 221, 20, 141, 20, 158, 101, 245, 7, 245, 225, 118, 137, 84, 55, 19, 27, - 164, 110, 35, 25, 202, 94, 150, 46, 91, 152, 130, 1, 7, 46, 16, 237, - 171, 109, 19, 200, 65, 38, 10, 213, 70, 96, 126, 226, 185, 225, 181, 46, - 10, 165, 11, 123, 53, 158, 22, 147, 64, 22, 227, 69, 182, 237, 197, 37, - 39, 49, 186, 223, 139, 128, 55, 36, 166, 178, 220, 20, 98, 172, 166, 253, - 45, 0, 120, 180, 189, 185, 158, 159, 196, 6, 214, 79, 141, 52, 156, 107, - 5, 109, 142, 159, 33, 64, 190, 133, 95, 132, 95, 202, 160, 63, 186, 23, - 231, 107, 163, 33, 234, 15, 244, 77, 108, 49, 51, 7, 164, 87, 142, 99, - 240, 202, 47, 256, 118, 190, 196, 178, 217, 42, 39, 153, 21, 192, 232, 202, - 14, 82, 179, 64, 233, 4, 219, 10, 133, 78, 43, 144, 146, 216, 202, 81, - 71, 252, 8, 201, 68, 256, 85, 233, 164, 88, 176, 30, 5, 152, 126, 179, - 249, 84, 140, 190, 159, 54, 118, 98, 2, 159, 27, 133, 74, 121, 239, 196, - 71, 149, 119, 135, 102, 20, 87, 112, 44, 75, 221, 3, 151, 158, 5, 98, - 152, 25, 97, 106, 63, 171, 240, 79, 234, 240, 230, 92, 76, 70, 173, 196, - 36, 225, 218, 133, 64, 240, 150, 41, 146, 66, 133, 51, 134, 73, 170, 238, - 140, 90, 45, 89, 46, 147, 96, 169, 174, 174, 244, 151, 90, 40, 32, 74, - 38, 154, 246, 57, 31, 14, 189, 151, 83, 243, 197, 183, 220, 185, 53, 225, - 51, 106, 188, 208, 222, 248, 93, 13, 93, 215, 131, 25, 142, 185, 113, 222, - 131, 215, 149, 50, 159, 85, 32, 5, 205, 192, 2, 227, 42, 214, 197, 42, - 126, 182, 68, 123, 109, 36, 237, 179, 170, 199, 77, 256, 5, 128, 214, 243, - 137, 177, 170, 253, 179, 180, 153, 236, 100, 196, 216, 231, 198, 37, 192, 80, - 121, 221, 246, 1, 16, 246, 29, 78, 64, 148, 124, 38, 96, 125, 28, 20, - 48, 51, 73, 187, 139, 208, 98, 253, 221, 188, 84, 129, 1, 205, 95, 205, - 117, 79, 71, 126, 134, 237, 19, 184, 137, 125, 129, 178, 223, 54, 188, 112, - 30, 7, 225, 228, 205, 184, 233, 87, 117, 22, 58, 10, 8, 42, 2, 114, - 254, 19, 17, 13, 150, 92, 233, 179, 63, 12, 60, 171, 127, 35, 50, 5, - 195, 113, 241, 25, 249, 184, 166, 44, 221, 35, 151, 116, 8, 54, 195, 89, - 218, 186, 132, 5, 41, 89, 226, 177, 11, 41, 87, 172, 5, 23, 20, 59, - 228, 94, 76, 33, 137, 43, 151, 221, 61, 232, 4, 120, 93, 217, 80, 228, - 228, 6, 58, 25, 62, 84, 91, 48, 209, 20, 247, 243, 55, 106, 80, 79, - 235, 34, 20, 180, 146, 2, 236, 13, 236, 206, 243, 222, 204, 83, 148, 213, - 214, 117, 237, 98, 0, 90, 204, 168, 32, 41, 126, 67, 191, 74, 27, 255, - 26, 75, 240, 113, 185, 105, 167, 154, 112, 67, 151, 63, 161, 134, 239, 176, - 42, 87, 249, 130, 45, 242, 17, 100, 107, 120, 212, 218, 237, 76, 231, 162, - 175, 172, 118, 155, 92, 36, 124, 17, 121, 71, 13, 9, 82, 126, 147, 142, - 218, 148, 138, 80, 163, 106, 164, 123, 140, 129, 35, 42, 186, 154, 228, 214, - 75, 73, 8, 253, 42, 153, 232, 164, 95, 24, 110, 90, 231, 197, 90, 196, - 57, 164, 252, 181, 31, 7, 97, 256, 35, 77, 200, 212, 99, 179, 92, 227, - 17, 180, 49, 176, 9, 188, 13, 182, 93, 44, 128, 219, 134, 92, 151, 6, - 23, 126, 200, 109, 66, 30, 140, 180, 146, 134, 67, 200, 7, 9, 223, 168, - 186, 221, 3, 154, 150, 165, 43, 53, 138, 27, 86, 213, 235, 160, 70, 2, - 240, 20, 89, 212, 84, 141, 168, 246, 183, 227, 30, 167, 138, 185, 253, 83, - 52, 143, 236, 94, 59, 65, 89, 218, 194, 157, 164, 156, 111, 95, 202, 168, - 245, 256, 151, 28, 222, 194, 72, 130, 217, 134, 253, 77, 246, 100, 76, 32, - 254, 174, 182, 193, 14, 237, 74, 1, 74, 26, 135, 216, 152, 208, 112, 38, - 181, 62, 25, 71, 61, 234, 254, 97, 191, 23, 92, 256, 190, 205, 6, 16, - 134, 147, 210, 219, 148, 59, 73, 185, 24, 247, 174, 143, 116, 220, 128, 144, - 111, 126, 101, 98, 130, 136, 101, 102, 69, 127, 24, 168, 146, 226, 226, 207, - 176, 122, 149, 254, 134, 196, 22, 151, 197, 21, 50, 205, 116, 154, 65, 116, - 177, 224, 127, 77, 177, 159, 225, 69, 176, 54, 100, 104, 140, 8, 11, 126, - 11, 188, 185, 159, 107, 16, 254, 142, 80, 28, 5, 157, 104, 57, 109, 82, - 102, 80, 173, 242, 238, 207, 57, 105, 237, 160, 59, 189, 189, 199, 26, 11, - 190, 156, 97, 118, 20, 12, 254, 189, 165, 147, 142, 199, 5, 213, 64, 133, - 108, 217, 133, 60, 94, 28, 116, 136, 47, 165, 125, 42, 183, 143, 14, 129, - 223, 70, 212, 205, 181, 180, 3, 201, 182, 46, 57, 104, 239, 60, 99, 181, - 220, 231, 45, 79, 156, 89, 149, 143, 190, 103, 153, 61, 235, 73, 136, 20, - 89, 243, 16, 130, 247, 141, 134, 93, 80, 68, 85, 84, 8, 72, 194, 4, - 242, 110, 19, 133, 199, 70, 172, 92, 132, 254, 67, 74, 36, 94, 13, 90, - 154, 184, 9, 109, 118, 243, 214, 71, 36, 95, 0, 90, 201, 105, 112, 215, - 69, 196, 224, 210, 236, 242, 155, 211, 37, 134, 69, 113, 157, 97, 68, 26, - 230, 149, 219, 180, 20, 76, 172, 145, 154, 40, 129, 8, 93, 56, 162, 124, - 207, 233, 105, 19, 3, 183, 155, 134, 8, 244, 213, 78, 139, 88, 156, 37, - 51, 152, 111, 102, 112, 250, 114, 252, 201, 241, 133, 24, 136, 153, 5, 90, - 210, 197, 216, 24, 131, 17, 147, 246, 13, 86, 3, 253, 179, 237, 101, 114, - 243, 191, 207, 2, 220, 133, 244, 53, 87, 125, 154, 158, 197, 20, 8, 83, - 32, 191, 38, 241, 204, 22, 168, 59, 217, 123, 162, 82, 21, 50, 130, 89, - 239, 253, 195, 56, 253, 74, 147, 125, 234, 199, 250, 28, 65, 193, 22, 237, - 193, 94, 58, 229, 139, 176, 69, 42, 179, 164, 150, 168, 246, 214, 86, 174, - 59, 117, 15, 19, 76, 37, 214, 238, 153, 226, 154, 45, 109, 114, 198, 107, - 45, 70, 238, 196, 142, 252, 244, 71, 123, 136, 134, 188, 99, 132, 25, 42, - 240, 0, 196, 33, 26, 124, 256, 145, 27, 102, 153, 35, 28, 132, 221, 167, - 138, 133, 41, 170, 95, 224, 40, 139, 239, 153, 1, 106, 255, 106, 170, 163, - 127, 44, 155, 232, 194, 119, 232, 117, 239, 143, 108, 41, 3, 9, 180, 256, - 144, 113, 133, 200, 79, 69, 128, 216, 31, 50, 102, 209, 249, 136, 150, 154, - 182, 51, 228, 39, 127, 142, 87, 15, 94, 92, 187, 245, 31, 236, 64, 58, - 114, 11, 17, 166, 189, 152, 218, 34, 123, 39, 58, 37, 153, 91, 63, 121, - 31, 34, 12, 254, 106, 96, 171, 14, 155, 247, 214, 69, 24, 98, 3, 204, - 202, 194, 207, 30, 253, 44, 119, 70, 14, 96, 82, 250, 63, 6, 232, 38, - 89, 144, 102, 191, 82, 254, 20, 222, 96, 162, 110, 6, 159, 58, 200, 226, - 98, 128, 42, 70, 84, 247, 128, 211, 136, 54, 143, 166, 60, 118, 99, 218, - 27, 193, 85, 81, 219, 223, 46, 41, 23, 233, 152, 222, 36, 236, 54, 181, - 56, 50, 4, 207, 129, 92, 78, 88, 197, 251, 131, 105, 31, 172, 38, 131, - 19, 204, 129, 47, 227, 106, 202, 183, 23, 6, 77, 224, 102, 147, 11, 218, - 131, 132, 60, 192, 208, 223, 236, 23, 103, 115, 89, 18, 185, 171, 70, 174, - 139, 0, 100, 160, 221, 11, 228, 60, 12, 122, 114, 12, 157, 235, 148, 57, - 83, 62, 173, 131, 169, 126, 85, 99, 93, 243, 81, 80, 29, 245, 206, 82, - 236, 227, 166, 14, 230, 213, 144, 97, 27, 111, 99, 164, 105, 150, 89, 111, - 252, 118, 140, 232, 120, 183, 137, 213, 232, 157, 224, 33, 134, 118, 186, 80, - 159, 2, 186, 193, 54, 242, 25, 237, 232, 249, 226, 213, 90, 149, 90, 160, - 118, 69, 64, 37, 10, 183, 109, 246, 30, 52, 219, 69, 189, 26, 116, 220, - 50, 244, 243, 243, 139, 137, 232, 98, 38, 45, 256, 143, 171, 101, 73, 238, - 123, 45, 194, 167, 250, 123, 12, 29, 136, 237, 141, 21, 89, 96, 199, 44, - 8, 214, 208, 17, 113, 41, 137, 26, 166, 155, 89, 85, 54, 58, 97, 160, - 50, 239, 58, 71, 21, 157, 139, 12, 37, 198, 182, 131, 149, 134, 16, 204, - 164, 181, 248, 166, 52, 216, 136, 201, 37, 255, 187, 240, 5, 101, 147, 231, - 14, 163, 253, 134, 146, 216, 8, 54, 224, 90, 220, 195, 75, 215, 186, 58, - 71, 204, 124, 105, 239, 53, 16, 85, 69, 163, 195, 223, 33, 38, 69, 88, - 88, 203, 99, 55, 176, 13, 156, 204, 236, 99, 194, 134, 75, 247, 126, 129, - 160, 124, 233, 206, 139, 144, 154, 45, 233, 51, 206, 61, 60, 55, 205, 107, - 84, 108, 96, 188, 203, 31, 89, 20, 115, 144, 137, 90, 237, 78, 231, 185, - 120, 217, 1, 176, 169, 30, 155, 176, 100, 113, 53, 42, 193, 108, 14, 121, - 176, 158, 137, 92, 178, 44, 110, 249, 108, 234, 94, 101, 128, 12, 250, 173, - 72, 202, 232, 66, 139, 152, 189, 18, 32, 197, 9, 238, 246, 55, 119, 183, - 196, 119, 113, 247, 191, 100, 200, 245, 46, 16, 234, 112, 136, 116, 232, 48, - 176, 108, 11, 237, 14, 153, 93, 177, 124, 72, 67, 121, 135, 143, 45, 18, - 97, 251, 184, 172, 136, 55, 213, 8, 103, 12, 221, 212, 13, 160, 116, 91, - 237, 127, 218, 190, 103, 131, 77, 82, 36, 100, 22, 252, 79, 69, 54, 26, - 65, 182, 115, 142, 247, 20, 89, 81, 188, 244, 27, 120, 240, 248, 13, 230, - 67, 133, 32, 201, 129, 87, 9, 245, 66, 88, 166, 34, 46, 184, 119, 218, - 144, 235, 163, 40, 138, 134, 127, 217, 64, 227, 116, 67, 55, 202, 130, 48, - 199, 42, 251, 112, 124, 153, 123, 194, 243, 49, 250, 12, 78, 157, 167, 134, - 210, 73, 156, 102, 21, 88, 216, 123, 45, 11, 208, 18, 47, 187, 20, 43, - 3, 180, 124, 2, 136, 176, 77, 111, 138, 139, 91, 225, 126, 8, 74, 255, - 88, 192, 193, 239, 138, 204, 139, 194, 166, 130, 252, 184, 140, 168, 30, 177, - 121, 98, 131, 124, 69, 171, 75, 49, 184, 34, 76, 122, 202, 115, 184, 253, - 120, 182, 33, 251, 1, 74, 216, 217, 243, 168, 70, 162, 119, 158, 197, 198, - 61, 89, 7, 5, 54, 199, 211, 170, 23, 226, 44, 247, 165, 195, 7, 225, - 91, 23, 50, 15, 51, 208, 106, 94, 12, 31, 43, 112, 146, 139, 246, 182, - 113, 1, 97, 15, 66, 2, 51, 76, 164, 184, 237, 200, 218, 176, 72, 98, - 33, 135, 38, 147, 140, 229, 50, 94, 81, 187, 129, 17, 238, 168, 146, 203, - 181, 99, 164, 3, 104, 98, 255, 189, 114, 142, 86, 102, 229, 102, 80, 129, - 64, 84, 79, 161, 81, 156, 128, 111, 164, 197, 18, 15, 55, 196, 198, 191, - 28, 113, 117, 96, 207, 253, 19, 158, 231, 13, 53, 130, 252, 211, 58, 180, - 212, 142, 7, 219, 38, 81, 62, 109, 167, 113, 33, 56, 97, 185, 157, 130, - 186, 129, 119, 182, 196, 26, 54, 110, 65, 170, 166, 236, 30, 22, 162, 0, - 106, 12, 248, 33, 48, 72, 159, 17, 76, 244, 172, 132, 89, 171, 196, 76, - 254, 166, 76, 218, 226, 3, 52, 220, 238, 181, 179, 144, 225, 23, 3, 166, - 158, 35, 228, 154, 204, 23, 203, 71, 134, 189, 18, 168, 236, 141, 117, 138, - 2, 132, 78, 57, 154, 21, 250, 196, 184, 40, 161, 40, 10, 178, 134, 120, - 132, 123, 101, 82, 205, 121, 55, 140, 231, 56, 231, 71, 206, 246, 198, 150, - 146, 192, 45, 105, 242, 1, 125, 18, 176, 46, 222, 122, 19, 80, 113, 133, - 131, 162, 81, 51, 98, 168, 247, 161, 139, 39, 63, 162, 22, 153, 170, 92, - 91, 130, 174, 200, 45, 112, 99, 164, 132, 184, 191, 186, 200, 167, 86, 145, - 167, 227, 130, 44, 12, 158, 172, 249, 204, 17, 54, 249, 16, 200, 21, 174, - 67, 223, 105, 201, 50, 36, 133, 203, 244, 131, 228, 67, 29, 195, 91, 91, - 55, 107, 167, 154, 170, 137, 218, 183, 169, 61, 99, 175, 128, 23, 142, 183, - 66, 255, 59, 187, 66, 85, 212, 109, 168, 82, 16, 43, 67, 139, 114, 176, - 216, 255, 130, 94, 152, 79, 183, 64, 100, 23, 214, 82, 34, 230, 48, 15, - 242, 130, 50, 241, 81, 32, 5, 125, 183, 182, 184, 99, 248, 109, 159, 210, - 226, 61, 119, 129, 39, 149, 78, 214, 107, 78, 147, 124, 228, 18, 143, 188, - 84, 180, 233, 119, 64, 39, 158, 133, 177, 168, 6, 150, 80, 117, 150, 56, - 49, 72, 49, 37, 30, 242, 49, 142, 33, 156, 34, 44, 44, 72, 58, 22, - 249, 46, 168, 80, 25, 196, 64, 174, 97, 179, 244, 134, 213, 105, 63, 151, - 21, 90, 168, 90, 245, 28, 157, 65, 250, 232, 188, 27, 99, 160, 156, 127, - 68, 193, 10, 80, 205, 36, 138, 229, 12, 223, 70, 169, 251, 41, 48, 94, - 41, 177, 99, 256, 158, 0, 6, 83, 231, 191, 120, 135, 157, 146, 218, 213, - 160, 7, 47, 234, 98, 211, 79, 225, 179, 95, 175, 105, 185, 79, 115, 0, - 104, 14, 65, 124, 15, 188, 52, 9, 253, 27, 132, 137, 13, 127, 75, 238, - 185, 253, 33, 8, 52, 157, 164, 68, 232, 188, 69, 28, 209, 233, 5, 129, - 216, 90, 252, 212, 33, 200, 222, 9, 112, 15, 43, 36, 226, 114, 15, 249, - 217, 8, 148, 22, 147, 23, 143, 67, 222, 116, 235, 250, 212, 210, 39, 142, - 108, 64, 209, 83, 73, 66, 99, 34, 17, 29, 45, 151, 244, 114, 28, 241, - 144, 208, 146, 179, 132, 89, 217, 198, 252, 219, 205, 165, 75, 107, 11, 173, - 76, 6, 196, 247, 152, 216, 248, 91, 209, 178, 57, 250, 174, 60, 79, 123, - 18, 135, 9, 241, 230, 159, 184, 68, 156, 251, 215, 9, 113, 234, 75, 235, - 103, 194, 205, 129, 230, 45, 96, 73, 157, 20, 200, 212, 212, 228, 161, 7, - 231, 228, 108, 43, 198, 87, 140, 140, 4, 182, 164, 3, 53, 104, 250, 213, - 85, 38, 89, 61, 52, 187, 35, 204, 86, 249, 100, 71, 248, 213, 163, 215, - 66, 106, 252, 129, 40, 111, 47, 24, 186, 221, 85, 205, 199, 237, 122, 181, - 32, 46, 182, 135, 33, 251, 142, 34, 208, 242, 128, 255, 4, 234, 15, 33, - 167, 222, 32, 186, 191, 34, 255, 244, 98, 240, 228, 204, 30, 142, 32, 70, - 69, 83, 110, 151, 10, 243, 141, 21, 223, 69, 61, 37, 59, 209, 102, 114, - 223, 33, 129, 254, 255, 103, 86, 247, 235, 72, 126, 177, 102, 226, 102, 30, - 149, 221, 62, 247, 251, 120, 163, 173, 57, 202, 204, 24, 39, 106, 120, 143, - 202, 176, 191, 147, 37, 38, 51, 133, 47, 245, 157, 132, 154, 71, 183, 111, - 30, 180, 18, 202, 82, 96, 170, 91, 157, 181, 212, 140, 256, 8, 196, 121, - 149, 79, 66, 127, 113, 78, 4, 197, 84, 256, 111, 222, 102, 63, 228, 104, - 136, 223, 67, 193, 93, 154, 249, 83, 204, 101, 200, 234, 84, 252, 230, 195, - 43, 140, 120, 242, 89, 63, 166, 233, 209, 94, 43, 170, 126, 5, 205, 78, - 112, 80, 143, 151, 146, 248, 137, 203, 45, 183, 61, 1, 155, 8, 102, 59, - 68, 212, 230, 61, 254, 191, 128, 223, 176, 123, 229, 27, 146, 120, 96, 165, - 213, 12, 232, 40, 186, 225, 66, 105, 200, 195, 212, 110, 237, 238, 151, 19, - 12, 171, 150, 82, 7, 228, 79, 52, 15, 78, 62, 43, 21, 154, 114, 21, - 12, 212, 256, 232, 125, 127, 5, 51, 37, 252, 136, 13, 47, 195, 168, 191, - 231, 55, 57, 251, 214, 116, 15, 86, 210, 41, 249, 242, 119, 27, 250, 203, - 107, 69, 90, 43, 206, 154, 127, 54, 100, 78, 187, 54, 244, 177, 234, 167, - 202, 136, 209, 171, 69, 114, 133, 173, 26, 139, 78, 141, 128, 32, 124, 39, - 45, 218, 96, 68, 90, 44, 67, 62, 83, 190, 188, 256, 103, 42, 102, 64, - 249, 0, 141, 11, 61, 69, 70, 66, 233, 237, 29, 200, 251, 157, 71, 51, - 64, 133, 113, 76, 35, 125, 76, 137, 217, 145, 35, 69, 226, 180, 56, 249, - 156, 163, 176, 237, 81, 54, 85, 169, 115, 211, 129, 70, 248, 40, 252, 192, - 194, 101, 247, 8, 181, 124, 217, 191, 194, 93, 99, 127, 117, 177, 144, 151, - 228, 121, 32, 11, 89, 81, 26, 29, 183, 76, 249, 132, 179, 70, 34, 102, - 20, 66, 87, 63, 124, 205, 174, 177, 87, 219, 73, 218, 91, 87, 176, 72, - 15, 211, 47, 61, 251, 165, 39, 247, 146, 70, 150, 57, 1, 212, 36, 162, - 39, 38, 16, 216, 3, 50, 116, 200, 32, 234, 77, 181, 155, 19, 90, 188, - 36, 6, 254, 46, 46, 203, 25, 230, 181, 196, 4, 151, 225, 65, 122, 216, - 168, 86, 158, 131, 136, 16, 49, 102, 233, 64, 154, 88, 228, 52, 146, 69, - 93, 157, 243, 121, 70, 209, 126, 213, 88, 145, 236, 65, 70, 96, 204, 47, - 10, 200, 77, 8, 103, 150, 48, 153, 5, 37, 52, 235, 209, 31, 181, 126, - 83, 142, 224, 140, 6, 32, 200, 171, 160, 179, 115, 229, 75, 194, 208, 39, - 59, 223, 52, 247, 38, 197, 135, 1, 6, 189, 106, 114, 168, 5, 211, 222, - 44, 63, 90, 160, 116, 172, 170, 133, 125, 138, 39, 131, 23, 178, 10, 214, - 36, 93, 28, 59, 68, 17, 123, 25, 255, 184, 204, 102, 194, 214, 129, 94, - 159, 245, 112, 141, 62, 11, 61, 197, 124, 221, 205, 11, 79, 71, 201, 54, - 58, 150, 29, 121, 87, 46, 240, 201, 68, 20, 194, 209, 47, 152, 158, 174, - 193, 164, 120, 255, 216, 165, 247, 58, 85, 130, 220, 23, 122, 223, 188, 98, - 21, 70, 72, 170, 150, 237, 76, 143, 112, 238, 206, 146, 215, 110, 4, 250, - 68, 44, 174, 177, 30, 98, 143, 241, 180, 127, 113, 48, 0, 1, 179, 199, - 59, 106, 201, 114, 29, 86, 173, 133, 217, 44, 200, 141, 107, 172, 16, 60, - 82, 58, 239, 94, 141, 234, 186, 235, 109, 173, 249, 139, 141, 59, 100, 248, - 84, 144, 49, 160, 51, 207, 164, 103, 74, 97, 146, 202, 193, 125, 168, 134, - 236, 111, 135, 121, 59, 145, 168, 200, 181, 173, 109, 2, 255, 6, 9, 245, - 90, 202, 214, 143, 121, 65, 85, 232, 132, 77, 228, 84, 26, 54, 184, 15, - 161, 29, 177, 79, 43, 0, 156, 184, 163, 165, 62, 90, 179, 93, 45, 239, - 1, 16, 120, 189, 127, 47, 74, 166, 20, 214, 233, 226, 89, 217, 229, 26, - 156, 53, 162, 60, 21, 3, 192, 72, 111, 51, 53, 101, 181, 208, 88, 82, - 179, 160, 219, 113, 240, 108, 43, 224, 162, 147, 62, 14, 95, 81, 205, 4, - 160, 177, 225, 115, 29, 69, 235, 168, 148, 29, 128, 114, 124, 129, 172, 165, - 215, 231, 214, 86, 160, 44, 157, 91, 248, 183, 73, 164, 56, 181, 162, 92, - 141, 118, 127, 240, 196, 77, 0, 9, 244, 79, 250, 100, 195, 25, 255, 85, - 94, 35, 212, 137, 107, 34, 110, 20, 200, 104, 17, 32, 231, 43, 150, 159, - 231, 216, 223, 190, 226, 109, 162, 197, 87, 92, 224, 11, 111, 73, 60, 225, - 238, 73, 246, 169, 19, 217, 119, 38, 121, 118, 70, 82, 99, 241, 110, 67, - 31, 76, 146, 215, 124, 240, 31, 103, 139, 224, 75, 160, 31, 78, 93, 4, - 64, 9, 103, 223, 6, 227, 119, 85, 116, 81, 21, 43, 46, 206, 234, 132, - 85, 99, 22, 131, 135, 97, 86, 13, 234, 188, 21, 14, 89, 169, 207, 238, - 219, 177, 190, 72, 157, 41, 114, 140, 92, 141, 186, 1, 63, 107, 225, 184, - 118, 150, 153, 254, 241, 106, 120, 210, 104, 144, 151, 161, 88, 206, 125, 164, - 15, 211, 173, 49, 146, 241, 71, 36, 58, 201, 46, 27, 33, 187, 91, 162, - 117, 19, 210, 213, 187, 97, 193, 50, 190, 114, 217, 60, 61, 167, 207, 213, - 213, 53, 135, 34, 156, 91, 115, 119, 46, 99, 242, 1, 90, 52, 198, 227, - 201, 91, 216, 146, 210, 82, 121, 38, 73, 133, 182, 193, 132, 148, 246, 75, - 109, 157, 179, 113, 176, 134, 205, 159, 148, 58, 103, 171, 132, 156, 133, 147, - 161, 231, 39, 100, 175, 97, 125, 28, 183, 129, 135, 191, 202, 181, 29, 218, - 43, 104, 148, 203, 189, 204, 4, 182, 169, 1, 134, 122, 141, 202, 13, 187, - 177, 112, 162, 35, 231, 6, 8, 241, 99, 6, 191, 45, 113, 113, 101, 104}; - -// The S-Box we use for further linearity breaking. -// We created it by taking the digits of decimal expansion of e. -// The code that created it can be found in 'ProduceRandomSBox.c'. -unsigned char SBox[256] = { -//0 1 2 3 4 5 6 7 8 9 A B C D E F -0x7d, 0xd1, 0x70, 0x0b, 0xfa, 0x39, 0x18, 0xc3, 0xf3, 0xbb, 0xa7, 0xd4, 0x84, 0x25, 0x3b, 0x3c, // 0 -0x2c, 0x15, 0x69, 0x9a, 0xf9, 0x27, 0xfb, 0x02, 0x52, 0xba, 0xa8, 0x4b, 0x20, 0xb5, 0x8b, 0x3a, // 1 -0x88, 0x8e, 0x26, 0xcb, 0x71, 0x5e, 0xaf, 0xad, 0x0c, 0xac, 0xa1, 0x93, 0xc6, 0x78, 0xce, 0xfc, // 2 -0x2a, 0x76, 0x17, 0x1f, 0x62, 0xc2, 0x2e, 0x99, 0x11, 0x37, 0x65, 0x40, 0xfd, 0xa0, 0x03, 0xc1, // 3 -0xca, 0x48, 0xe2, 0x9b, 0x81, 0xe4, 0x1c, 0x01, 0xec, 0x68, 0x7a, 0x5a, 0x50, 0xf8, 0x0e, 0xa3, // 4 -0xe8, 0x61, 0x2b, 0xa2, 0xeb, 0xcf, 0x8c, 0x3d, 0xb4, 0x95, 0x13, 0x08, 0x46, 0xab, 0x91, 0x7b, // 5 -0xea, 0x55, 0x67, 0x9d, 0xdd, 0x29, 0x6a, 0x8f, 0x9f, 0x22, 0x4e, 0xf2, 0x57, 0xd2, 0xa9, 0xbd, // 6 -0x38, 0x16, 0x5f, 0x4c, 0xf7, 0x9e, 0x1b, 0x2f, 0x30, 0xc7, 0x41, 0x24, 0x5c, 0xbf, 0x05, 0xf6, // 7 -0x0a, 0x31, 0xa5, 0x45, 0x21, 0x33, 0x6b, 0x6d, 0x6c, 0x86, 0xe1, 0xa4, 0xe6, 0x92, 0x9c, 0xdf, // 8 -0xe7, 0xbe, 0x28, 0xe3, 0xfe, 0x06, 0x4d, 0x98, 0x80, 0x04, 0x96, 0x36, 0x3e, 0x14, 0x4a, 0x34, // 9 -0xd3, 0xd5, 0xdb, 0x44, 0xcd, 0xf5, 0x54, 0xdc, 0x89, 0x09, 0x90, 0x42, 0x87, 0xff, 0x7e, 0x56, // A -0x5d, 0x59, 0xd7, 0x23, 0x75, 0x19, 0x97, 0x73, 0x83, 0x64, 0x53, 0xa6, 0x1e, 0xd8, 0xb0, 0x49, // B -0x3f, 0xef, 0xbc, 0x7f, 0x43, 0xf0, 0xc9, 0x72, 0x0f, 0x63, 0x79, 0x2d, 0xc0, 0xda, 0x66, 0xc8, // C -0x32, 0xde, 0x47, 0x07, 0xb8, 0xe9, 0x1d, 0xc4, 0x85, 0x74, 0x82, 0xcc, 0x60, 0x51, 0x77, 0x0d, // D -0xaa, 0x35, 0xed, 0x58, 0x7c, 0x5b, 0xb9, 0x94, 0x6e, 0x8d, 0xb1, 0xc5, 0xb7, 0xee, 0xb6, 0xae, // E -0x10, 0xe0, 0xd6, 0xd9, 0xe5, 0x4f, 0xf1, 0x12, 0x00, 0xd0, 0xf4, 0x1a, 0x6f, 0x8a, 0xb3, 0xb2 }; // F - -/////////////////////////////////////////////////////////////////////////////////////////////// -// -// Helper functions definition portion. -// -/////////////////////////////////////////////////////////////////////////////////////////////// - -// Don't vectorize, move decl to header file - -// Translates an input array with values in base 257 to output array with values in base 256. -// Returns the carry bit. -// -// Parameters: -// - input: the input array of size EIGHTH_N. Each value in the array is a number in Z_257. -// The MSB is assumed to be the last one in the array. -// - output: the input array encoded in base 256. -// -// Returns: -// - The carry bit (MSB). -swift_int16_t TranslateToBase256(swift_int32_t input[EIGHTH_N], unsigned char output[EIGHTH_N]); - -// Translates an input integer into the range (-FIELD_SIZE / 2) <= result <= (FIELD_SIZE / 2). -// -// Parameters: -// - x: the input integer. -// -// Returns: -// - The result, which equals (x MOD FIELD_SIZE), such that |result| <= (FIELD_SIZE / 2). -int Center(int x); - -// Calculates bit reversal permutation. -// -// Parameters: -// - input: the input to reverse. -// - numOfBits: the number of bits in the input to reverse. -// -// Returns: -// - The resulting number, which is obtained from the input by reversing its bits. -int ReverseBits(int input, int numOfBits); - -// Initializes the FFT fast lookup table. -// Shall be called only once. -void InitializeSWIFFTX(); - -// Calculates the FFT. -// -// Parameters: -// - input: the input to the FFT. -// - output: the resulting output. -void FFT(const unsigned char input[EIGHTH_N], swift_int32_t *output); - -/////////////////////////////////////////////////////////////////////////////////////////////// -// Helper functions implementation portion. -/////////////////////////////////////////////////////////////////////////////////////////////// - -// Don't vectorize, delete this copy. - -swift_int16_t TranslateToBase256(swift_int32_t input[EIGHTH_N], unsigned char output[EIGHTH_N]) -{ - swift_int32_t pairs[EIGHTH_N / 2]; - int i; - - for (i = 0; i < EIGHTH_N; i += 2) - { - // input[i] + 257 * input[i + 1] - pairs[i >> 1] = input[i] + input[i + 1] + (input[i + 1] << 8); - } - - for (i = (EIGHTH_N / 2) - 1; i > 0; --i) - { - int j; - - for (j = i - 1; j < (EIGHTH_N / 2) - 1; ++j) - { - // pairs[j + 1] * 513, because 257^2 = 513 % 256^2. - register swift_int32_t temp = pairs[j] + pairs[j + 1] + (pairs[j + 1] << 9); - pairs[j] = temp & 0xffff; - pairs[j + 1] += (temp >> 16); - } - } - - for (i = 0; i < EIGHTH_N; i += 2) - { - output[i] = (unsigned char) (pairs[i >> 1] & 0xff); - output[i + 1] = (unsigned char) ((pairs[i >> 1] >> 8) & 0xff); - } - - return (pairs[EIGHTH_N/2 - 1] >> 16); -} - -int Center(int x) -{ - int result = x % FIELD_SIZE; - - if (result > (FIELD_SIZE / 2)) - result -= FIELD_SIZE; - - if (result < (FIELD_SIZE / -2)) - result += FIELD_SIZE; - - return result; -} - -int ReverseBits(int input, int numOfBits) -{ - register int reversed = 0; - - for (input |= numOfBits; input > 1; input >>= 1) - reversed = (reversed << 1) | (input & 1); - - return reversed; -} - -void InitializeSWIFFTX() -{ - int i, j, k, x; - // The powers of OMEGA - int omegaPowers[2 * N]; - omegaPowers[0] = 1; - - if (wasSetupDone) - return; - - for (i = 1; i < (2 * N); ++i) - { - omegaPowers[i] = Center(omegaPowers[i - 1] * OMEGA); - } - - for (i = 0; i < (N / W); ++i) - { - for (j = 0; j < W; ++j) - { - multipliers[(i << 3) + j] = omegaPowers[ReverseBits(i, N / W) * (2 * j + 1)]; - } - } - - for (x = 0; x < 256; ++x) - { - for (j = 0; j < 8; ++j) - { - register int temp = 0; - for (k = 0; k < 8; ++k) - { - temp += omegaPowers[(EIGHTH_N * (2 * j + 1) * ReverseBits(k, W)) % (2 * N)] - * ((x >> k) & 1); - } - - fftTable[(x << 3) + j] = Center(temp); - } - } - - wasSetupDone = true; -} - -// input should be deinterleaved in contiguos memory -// output and F are 4x32 -// multipliers & fftTable are scalar 16 - - -void FFT_4way(const unsigned char input[EIGHTH_N], swift_int32_t *output) -{ - swift_int16_t *mult = multipliers; - m128_swift_int32_t F[64]; - - for (int i = 0; i < 8; i++) - { - int j = i<<3; - -// Need to isolate bytes in input, 8 bytes per lane. -// Each iteration of the loop process one input vector -// Each lane reads a different index to ffttable. - -// deinterleave the input! - -// load table with 4 lanes from different indexes into fftTable -// extract bytes into m128 4x16 -// mutiply by vectorized mult - -// input[lane][byte] - - __m128i table; - table = _mm_set_epi32( fftTable[ input[3][i] ], - fftTable[ input[2][i] ], - fftTable[ input[1][i] ], - fftTable[ input[0][i] ] ); - - F[i ] = _mm_mullo_epi32( mm128_const1_32( mult[j+0] ), table ); - - table = _mm_set_epi32( fftTable[ input[3][i+1] ] - fftTable[ input[2][i+1] ] - fftTable[ input[1][i+1] ] - fftTable[ input[0][i+1] ] ); - - F[i+8] = _mm_mullo_epi32( mm128_const1_32( mult[j+0] ), table ); - - - m128_swift_int16_t *table = &( fftTable[input[i] << 3] ); - - F[i ] = _mm_mullo_epi32( mm128_const1_32( mult[j+0] ), - mm128_const1_32( table[0] ) ); - F[i+ 8] = _mm_mullo_epi32( mm128_const1_32( mult[j+1] ), - mm128_const1_32( table[1] ) ); - F[i+16] = _mm_mullo_epi32( mm128_const1_32( mult[j+2] ), - mm128_const1_32( table[2] ) ); - F[i+24] = _mm_mullo_epi32( mm128_const1_32( mult[j+3] ), - mm128_const1_32( table[3] ) ); - F[i+32] = _mm_mullo_epi32( mm128_const1_32( mult[j+4] ), - mm128_const1_32( table[4] ) ); - F[i+40] = _mm_mullo_epi32( mm128_const1_32( mult[j+5] ), - mm128_const1_32( table[5] ) ); - F[i+48] = _mm_mullo_epi32( mm128_const1_32( mult[j+6] ), - mm128_const1_32( table[6] ) ); - F[i+56] = _mm_mullo_epi32( mm128_const1_32( mult[j+7] ), - mm128_const1_32( table[7] ) ); - } - - - for ( int i = 0; i < 8; i++ ) - { - int j = i<<3; - ADD_SUB_4WAY( F[j ], F[j+1] ); - ADD_SUB_4WAY( F[j+2], F[j+3] ); - ADD_SUB_4WAY( F[j+4], F[j+5] ); - ADD_SUB_4WAY( F[j+6], F[j+7] ); - - F[j+3] = _mm_slli_epi32( F[j+3], 4 ); - F[j+7] = _mm_slli_epi32( F[j+7], 4 ); - - ADD_SUB_4WAY( F[j ], F[j+2] ); - ADD_SUB_4WAY( F[j+1], F[j+3] ); - ADD_SUB_4WAY( F[j+4], F[j+6] ); - ADD_SUB_4WAY( F[j+5], F[j+7] ); - - F[j+5] = _mm_slli_epi32( F[j+5], 2 ); - F[j+6] = _mm_slli_epi32( F[j+6], 4 ); - F[j+7] = _mm_slli_epi32( F[j+7], 6 ); - - ADD_SUB_4WAY( F[j ], F[j+4] ); - ADD_SUB_4WAY( F[j+1], F[j+5] ); - ADD_SUB_4WAY( F[j+2], F[j+6] ); - ADD_SUB_4WAY( F[j+3], F[j+7] ); - - output[i ] = Q_REDUCE_4WAY( F[j ] ); - output[i+ 8] = Q_REDUCE_4WAY( F[j+1] ); - output[i+16] = Q_REDUCE_4WAY( F[j+2] ); - output[i+24] = Q_REDUCE_4WAY( F[j+3] ); - output[i+32] = Q_REDUCE_4WAY( F[j+4] ); - output[i+40] = Q_REDUCE_4WAY( F[j+5] ); - output[i+48] = Q_REDUCE_4WAY( F[j+6] ); - output[i+56] = Q_REDUCE_4WAY( F[j+7] ); - } -} - -// Calculates the FFT part of SWIFFT. -// We divided the SWIFFT calculation into two, because that way we could save 2 computations of -// the FFT part, since in the first stage of SWIFFTX the difference between the first 3 SWIFFTs -// is only the A's part. -// -// Parameters: -// - input: the input to FFT. -// - m: the input size divided by 8. The function performs m FFTs. -// - output: will store the result. -void SWIFFTFFT(const unsigned char *input, int m, swift_int32_t *output) -{ - int i; - - for (i = 0; - i < m; - i++, input += EIGHTH_N, output += N) - { - FFT(input, output); - } -} - -// Calculates the 'sum' part of SWIFFT, including the base change at the end. -// We divided the SWIFFT calculation into two, because that way we could save 2 computations of -// the FFT part, since in the first stage of SWIFFTX the difference between the first 3 SWIFFTs -// is only the A's part. -// -// Parameters: -// - input: the input. Of size 64 * m. -// - m: the input size divided by 64. -// - output: will store the result. -// - a: the coefficients in the sum. Of size 64 * m. -void SWIFFTSum(const swift_int32_t *input, int m, unsigned char *output, const swift_int16_t *a) -{ - int i, j; - swift_int32_t result[N]; - register swift_int16_t carry = 0; - - for (j = 0; j < N; ++j) - { - register swift_int32_t sum = 0; - const register swift_int32_t *f = input + j; - const register swift_int16_t *k = a + j; - - for (i = 0; i < m; i++, f += N,k += N) - { - sum += (*f) * (*k); - } - - result[j] = sum; - } - - for (j = 0; j < N; ++j) - { - result[j] = ((FIELD_SIZE << 22) + result[j]) % FIELD_SIZE; - } - - for (j = 0; j < 8; ++j) - { - int register carryBit = TranslateToBase256(result + (j << 3), output + (j << 3)); - carry |= carryBit << j; - } - - output[N] = carry; -} - - -// On entry input is interleaved 4x64. SIZE is *4 lanes / 8 bytes, -// multiply by 2. - - -void ComputeSingleSWIFFTX_4way( unsigned char input[SWIFFTX_INPUT_BLOCK_SIZE], - unsigned char output[SWIFFTX_OUTPUT_BLOCK_SIZE], - bool doSmooth) -{ - int i; - // Will store the result of the FFT parts: - m128_swift_int32_t fftOut[N * M]; -// swift_int32_t fftOut[N * M]; - unsigned char intermediate[N * 3 + 8]; - unsigned char carry0,carry1,carry2; - - // Do the three SWIFFTS while remembering the three carry bytes (each carry byte gets - // overriden by the following SWIFFT): - - // 1. Compute the FFT of the input - the common part for the first 3 SWIFFTs: - SWIFFTFFT(input, M, fftOut); - - // 2. Compute the sums of the 3 SWIFFTs, each using a different set of coefficients: - - // 2a. The first SWIFFT: - SWIFFTSum(fftOut, M, intermediate, As); - // Remember the carry byte: - carry0 = intermediate[N]; - - // 2b. The second one: - SWIFFTSum(fftOut, M, intermediate + N, As + (M * N)); - carry1 = intermediate[2 * N]; - - // 2c. The third one: - SWIFFTSum(fftOut, M, intermediate + (2 * N), As + 2 * (M * N)); - carry2 = intermediate[3 * N]; - - //2d. Put three carry bytes in their place - intermediate[3 * N] = carry0; - intermediate[(3 * N) + 1] = carry1; - intermediate[(3 * N) + 2] = carry2; - - // Padding intermediate output with 5 zeroes. - memset(intermediate + (3 * N) + 3, 0, 5); - - // Apply the S-Box: - for (i = 0; i < (3 * N) + 8; ++i) - { - intermediate[i] = SBox[intermediate[i]]; - } - - // 3. The final and last SWIFFT: - SWIFFTFFT(intermediate, 3 * (N/8) + 1, fftOut); - SWIFFTSum(fftOut, 3 * (N/8) + 1, output, As); - - if (doSmooth) - { - unsigned char sum[N]; - register int i, j; - memset(sum, 0, N); - - for (i = 0; i < (N + 1) * 8; ++i) - { - register const swift_int16_t *AsRow; - register int AShift; - - if (!(output[i >> 3] & (1 << (i & 7)))) - { - continue; - } - - AsRow = As + N * M + (i & ~(N - 1)) ; - AShift = i & 63; - - for (j = AShift; j < N; ++j) - { - sum[j] += AsRow[j - AShift]; - } - - for(j = 0; j < AShift; ++j) - { - sum[j] -= AsRow[N - AShift + j]; - } - } - - for (i = 0; i < N; ++i) - { - output[i] = sum[i]; - } - - output[N] = 0; - } -} diff --git a/algo/swifftx/swifftx.c b/algo/swifftx/swifftx.c index d3ecd15c..c7d8c727 100644 --- a/algo/swifftx/swifftx.c +++ b/algo/swifftx/swifftx.c @@ -604,21 +604,14 @@ void InitializeSWIFFTX() int omegaPowers[2 * N]; omegaPowers[0] = 1; - if (wasSetupDone) - return; + if (wasSetupDone) return; for (i = 1; i < (2 * N); ++i) - { omegaPowers[i] = Center(omegaPowers[i - 1] * OMEGA); - } for (i = 0; i < (N / W); ++i) - { for (j = 0; j < W; ++j) - { multipliers[(i << 3) + j] = omegaPowers[ReverseBits(i, N / W) * (2 * j + 1)]; - } - } for (x = 0; x < 256; ++x) { @@ -626,10 +619,8 @@ void InitializeSWIFFTX() { register int temp = 0; for (k = 0; k < 8; ++k) - { temp += omegaPowers[(EIGHTH_N * (2 * j + 1) * ReverseBits(k, W)) % (2 * N)] * ((x >> k) & 1); - } fftTable[(x << 3) + j] = Center(temp); } @@ -703,18 +694,18 @@ void FFT(const unsigned char input[EIGHTH_N], swift_int32_t *output) #if defined (__AVX512VL__) && defined(__AVX512BW__) - #define Q_REDUCE( a ) \ - _mm256_sub_epi32( _mm256_and_si256( a, \ - _mm256_movm_epi8( 0x11111111 ) ), _mm256_srai_epi32( a, 8 ) ) + const __m256i mask = _mm256_movm_epi8( 0x11111111 ); + +#else + + const __m256i mask = m256_const1_32( 0x000000ff ); -#else +#endif #define Q_REDUCE( a ) \ - _mm256_sub_epi32( _mm256_and_si256( a, \ - m256_const1_32( 0x000000ff ) ), _mm256_srai_epi32( a, 8 ) ) + _mm256_sub_epi32( _mm256_and_si256( a, mask ), \ + _mm256_srai_epi32( a, 8 ) ) -#endif - out[0] = Q_REDUCE( F[0] ); out[1] = Q_REDUCE( F[1] ); out[2] = Q_REDUCE( F[2] ); @@ -805,9 +796,10 @@ void FFT(const unsigned char input[EIGHTH_N], swift_int32_t *output) #undef ADD_SUB + const __m128i mask = m128_const1_32( 0x000000ff ); + #define Q_REDUCE( a ) \ - _mm_sub_epi32( _mm_and_si128( a, \ - m128_const1_32( 0x000000ff ) ), _mm_srai_epi32( a, 8 ) ) + _mm_sub_epi32( _mm_and_si128( a, mask ), _mm_srai_epi32( a, 8 ) ) out[ 0] = Q_REDUCE( F[ 0] ); out[ 1] = Q_REDUCE( F[ 1] ); @@ -1357,6 +1349,7 @@ void SWIFFTSum( const swift_int32_t *input, int m, unsigned char *output, output[N] = carry; } +/* void ComputeSingleSWIFFTX_smooth(unsigned char input[SWIFFTX_INPUT_BLOCK_SIZE], unsigned char output[SWIFFTX_OUTPUT_BLOCK_SIZE], bool doSmooth) @@ -1434,51 +1427,50 @@ void ComputeSingleSWIFFTX_smooth(unsigned char input[SWIFFTX_INPUT_BLOCK_SIZE], output[N] = 0; } } +*/ -void ComputeSingleSWIFFTX( unsigned char input[SWIFFTX_INPUT_BLOCK_SIZE], - unsigned char output[SWIFFTX_OUTPUT_BLOCK_SIZE] ) +void ComputeSingleSWIFFTX( unsigned char *input, unsigned char *output ) { int i; // Will store the result of the FFT parts: swift_int32_t fftOut[N * M] __attribute__ ((aligned (64))); - unsigned char intermediate[N * 3 + 8] __attribute__ ((aligned (64))); + unsigned char sum[ N*3 + 8 ] __attribute__ ((aligned (64))); unsigned char carry0,carry1,carry2; // Do the three SWIFFTS while remembering the three carry bytes (each carry byte gets // overriden by the following SWIFFT): // 1. Compute the FFT of the input - the common part for the first 3 SWIFFTs: - SWIFFTFFT(input, M, fftOut); + SWIFFTFFT( input, M, fftOut ); // 2. Compute the sums of the 3 SWIFFTs, each using a different set of coefficients: // 2a. The first SWIFFT: - SWIFFTSum(fftOut, M, intermediate, As); - // Remember the carry byte: - carry0 = intermediate[N]; + SWIFFTSum( fftOut, M, sum, As ); + carry0 = sum[N]; // 2b. The second one: - SWIFFTSum(fftOut, M, intermediate + N, As + (M * N)); - carry1 = intermediate[2 * N]; + SWIFFTSum( fftOut, M, sum + N, As + M*N ); + carry1 = sum[ 2*N ]; // 2c. The third one: - SWIFFTSum(fftOut, M, intermediate + (2 * N), As + 2 * (M * N)); - carry2 = intermediate[3 * N]; + SWIFFTSum( fftOut, M, sum + 2*N, As + 2*M*N ); + carry2 = sum[ 3*N ]; //2d. Put three carry bytes in their place - intermediate[3 * N] = carry0; - intermediate[(3 * N) + 1] = carry1; - intermediate[(3 * N) + 2] = carry2; + sum[ 3*N ] = carry0; + sum[ 3*N + 1 ] = carry1; + sum[ 3*N + 2 ] = carry2; // Padding intermediate output with 5 zeroes. - memset(intermediate + (3 * N) + 3, 0, 5); + memset( sum + 3*N + 3, 0, 5 ); // Apply the S-Box: for ( i = 0; i < (3 * N) + 8; ++i ) - intermediate[i] = SBox[intermediate[i]]; + sum[i] = SBox[ sum[i] ]; // 3. The final and last SWIFFT: - SWIFFTFFT(intermediate, 3 * (N/8) + 1, fftOut); - SWIFFTSum(fftOut, 3 * (N/8) + 1, output, As); - + SWIFFTFFT( sum, 3 * (N/8) + 1, fftOut ); + SWIFFTSum( fftOut, 3 * (N/8) + 1, sum, As ); + memcpy( output, sum, SWIFFTX_OUTPUT_BLOCK_SIZE - 1 ); } diff --git a/algo/swifftx/swifftx.h b/algo/swifftx/swifftx.h index eedbc8f0..ad2214a6 100644 --- a/algo/swifftx/swifftx.h +++ b/algo/swifftx/swifftx.h @@ -61,11 +61,10 @@ void ComputeSingleSWIFFT(unsigned char *input, unsigned short m, // // Returns: // - Success value. -void ComputeSingleSWIFFTX( unsigned char input[SWIFFTX_INPUT_BLOCK_SIZE], - unsigned char output[SWIFFTX_OUTPUT_BLOCK_SIZE] ); +void ComputeSingleSWIFFTX( unsigned char *input, unsigned char *output ); -void ComputeSingleSWIFFTX_smooth( unsigned char input[SWIFFTX_INPUT_BLOCK_SIZE], - unsigned char output[SWIFFTX_OUTPUT_BLOCK_SIZE], bool doSmooth); +//void ComputeSingleSWIFFTX_smooth( unsigned char input[SWIFFTX_INPUT_BLOCK_SIZE], +// unsigned char output[SWIFFTX_OUTPUT_BLOCK_SIZE], bool doSmooth); // Calculates the powers of OMEGA and generates the bit reversal permutation. // You must call this function before doing SWIFFT/X, otherwise you will get zeroes everywhere. diff --git a/algo/x16/x16r-gate.c b/algo/x16/x16r-gate.c index 3a94344b..88401062 100644 --- a/algo/x16/x16r-gate.c +++ b/algo/x16/x16r-gate.c @@ -62,8 +62,7 @@ bool register_x16r_algo( algo_gate_t* gate ) gate->scanhash = (void*)&scanhash_x16r; gate->hash = (void*)&x16r_hash; #endif - gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | - VAES_OPT | VAES256_OPT; + gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT; x16_r_s_getAlgoString = (void*)&x16r_getAlgoString; opt_target_factor = 256.0; return true; @@ -81,8 +80,7 @@ bool register_x16rv2_algo( algo_gate_t* gate ) gate->scanhash = (void*)&scanhash_x16rv2; gate->hash = (void*)&x16rv2_hash; #endif - gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | - VAES_OPT | VAES256_OPT; + gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT; x16_r_s_getAlgoString = (void*)&x16r_getAlgoString; opt_target_factor = 256.0; return true; @@ -100,8 +98,7 @@ bool register_x16s_algo( algo_gate_t* gate ) gate->scanhash = (void*)&scanhash_x16r; gate->hash = (void*)&x16r_hash; #endif - gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | - VAES_OPT | VAES256_OPT; + gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT; x16_r_s_getAlgoString = (void*)&x16s_getAlgoString; opt_target_factor = 256.0; return true; @@ -234,8 +231,7 @@ bool register_x16rt_algo( algo_gate_t* gate ) gate->scanhash = (void*)&scanhash_x16rt; gate->hash = (void*)&x16r_hash; #endif - gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | - VAES_OPT | VAES256_OPT; + gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT; opt_target_factor = 256.0; return true; }; @@ -252,8 +248,7 @@ bool register_x16rt_veil_algo( algo_gate_t* gate ) gate->scanhash = (void*)&scanhash_x16rt; gate->hash = (void*)&x16r_hash; #endif - gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | - VAES_OPT | VAES256_OPT; + gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT; gate->build_extraheader = (void*)&veil_build_extraheader; opt_target_factor = 256.0; return true; @@ -292,8 +287,7 @@ bool register_x21s_algo( algo_gate_t* gate ) gate->hash = (void*)&x21s_hash; gate->miner_thread_init = (void*)&x21s_thread_init; #endif - gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | - VAES_OPT | VAES256_OPT; + gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT; x16_r_s_getAlgoString = (void*)&x16s_getAlgoString; opt_target_factor = 256.0; return true; diff --git a/algo/x17/sonoa-gate.c b/algo/x17/sonoa-gate.c index d192b0df..926beb4c 100644 --- a/algo/x17/sonoa-gate.c +++ b/algo/x17/sonoa-gate.c @@ -12,7 +12,7 @@ bool register_sonoa_algo( algo_gate_t* gate ) init_sonoa_ctx(); gate->hash = (void*)&sonoa_hash; #endif - gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT | VAES256_OPT; + gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT; return true; }; diff --git a/algo/x17/x17-gate.c b/algo/x17/x17-gate.c index 6ab09ff0..eee3d60d 100644 --- a/algo/x17/x17-gate.c +++ b/algo/x17/x17-gate.c @@ -11,7 +11,7 @@ bool register_x17_algo( algo_gate_t* gate ) #else gate->hash = (void*)&x17_hash; #endif - gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT | VAES256_OPT; + gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT; return true; }; diff --git a/algo/x17/xevan-gate.c b/algo/x17/xevan-gate.c index 545a0aa6..184ed2df 100644 --- a/algo/x17/xevan-gate.c +++ b/algo/x17/xevan-gate.c @@ -12,7 +12,7 @@ bool register_xevan_algo( algo_gate_t* gate ) init_xevan_ctx(); gate->hash = (void*)&xevan_hash; #endif - gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT | VAES256_OPT; + gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT; opt_target_factor = 256.0; return true; }; diff --git a/algo/x22/x22i-gate.c b/algo/x22/x22i-gate.c index ff0cc805..826f0f88 100644 --- a/algo/x22/x22i-gate.c +++ b/algo/x22/x22i-gate.c @@ -31,8 +31,8 @@ bool register_x22i_algo( algo_gate_t* gate ) #endif - gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | SHA_OPT - | AVX512_OPT | VAES_OPT | VAES256_OPT; + gate->optimizations = SSE2_OPT | SSE42_OPT | AES_OPT | AVX2_OPT | SHA_OPT + | AVX512_OPT | VAES_OPT; return true; }; @@ -48,8 +48,8 @@ bool register_x25x_algo( algo_gate_t* gate ) gate->scanhash = (void*)&scanhash_x25x; gate->hash = (void*)&x25x_hash; #endif - gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | SHA_OPT | - AVX512_OPT | VAES_OPT | VAES256_OPT; + gate->optimizations = SSE2_OPT | SSE42_OPT | AES_OPT | AVX2_OPT | SHA_OPT | + AVX512_OPT | VAES_OPT; return true; }; diff --git a/build-allarch.sh b/build-allarch.sh index 5fa38f6c..4a80588e 100755 --- a/build-allarch.sh +++ b/build-allarch.sh @@ -4,128 +4,97 @@ # during develpment. However the information contained may provide compilation # tips to users. -rm cpuminer-avx512-sha-vaes cpuminer-avx512-sha cpuminer-avx512 cpuminer-avx2 cpuminer-avx cpuminer-aes-sse42 cpuminer-sse42 cpuminer-ssse3 cpuminer-sse2 cpuminer-zen cpuminer-zen3 > /dev/null +rm cpuminer-avx512-sha-vaes cpuminer-avx512 cpuminer-avx2 cpuminer-avx cpuminer-aes-sse42 cpuminer-sse42 cpuminer-ssse3 cpuminer-sse2 cpuminer-zen cpuminer-zen3 > /dev/null -# Icelake AVX512 SHA VAES +# AVX512 SHA VAES: Intel Core Icelake, Rocketlake make distclean || echo clean rm -f config.status ./autogen.sh || echo done CFLAGS="-O3 -march=icelake-client -Wall -fno-common" ./configure --with-curl +#CFLAGS="-O3 -march=rocketlake -Wall -fno-common" ./configure --with-curl make -j 8 -strip -s cpuminer.exe -mv cpuminer.exe cpuminer-avx512-sha-vaes.exe strip -s cpuminer mv cpuminer cpuminer-avx512-sha-vaes -# Rocketlake AVX512 SHA AES +# AVX512 AES: Intel Core HEDT Sylake-X, Cascadelake make clean || echo clean rm -f config.status -CFLAGS="-O3 -march=cascadelake -msha -Wall -fno-common" ./configure --with-curl -#CFLAGS="-O3 -march=skylake-avx512 -msha -Wall -fno-common" ./configure --with-curl -# CFLAGS="-O3 -march=rocketlake -Wall -fno-common" ./configure --with-curl +CFLAGS="-O3 -march=skylake-avx512 -maes -Wall -fno-common" ./configure --with-curl make -j 8 -strip -s cpuminer.exe -mv cpuminer.exe cpuminer-avx512-sha.exe strip -s cpuminer -mv cpuminer cpuminer-avx512-sha +mv cpuminer cpuminer-avx512 -# Slylake-X AVX512 AES -make clean || echo clean +# AVX2 SHA VAES: Intel Alderlake, AMD Zen3 +make clean || echo done rm -f config.status -CFLAGS="-O3 -march=skylake-avx512 -Wall -fno-common" ./configure --with-curl +# vaes doesn't include aes +CFLAGS="-O3 -maes -mavx2 -msha -mvaes -Wall -fno-common" ./configure --with-curl make -j 8 -strip -s cpuminer.exe -mv cpuminer.exe cpuminer-avx512.exe strip -s cpuminer -mv cpuminer cpuminer-avx512 +mv cpuminer cpuminer-avx2-sha-vaes + +# AVX2 SHA AES: AMD Zen1 +make clean || echo done +rm -f config.status +CFLAGS="-O3 -march=znver1 -maes -Wall -fno-common" ./configure --with-curl +#CFLAGS="-O3 -maes -mavx2 -msha -Wall -fno-common" ./configure --with-curl +make -j 8 +strip -s cpuminer +mv cpuminer cpuminer-avx2-sha -# Haswell AVX2 AES +# AVX2 AES: Intel Haswell..Cometlake make clean || echo clean rm -f config.status # GCC 9 doesn't include AES with core-avx2 CFLAGS="-O3 -march=core-avx2 -maes -Wall -fno-common" ./configure --with-curl make -j 8 -strip -s cpuminer.exe -mv cpuminer.exe cpuminer-avx2.exe strip -s cpuminer mv cpuminer cpuminer-avx2 -# Sandybridge AVX AES +# AVX AES: Intel Sandybridge, Ivybridge make clean || echo clean rm -f config.status CFLAGS="-O3 -march=corei7-avx -maes -Wall -fno-common" ./configure --with-curl make -j 8 -strip -s cpuminer.exe -mv cpuminer.exe cpuminer-avx.exe strip -s cpuminer mv cpuminer cpuminer-avx -# Westmere SSE4.2 AES +# SSE4.2 AES: Intel Westmere make clean || echo clean rm -f config.status CFLAGS="-O3 -march=westmere -maes -Wall -fno-common" ./configure --with-curl make -j 8 -strip -s cpuminer.exe -mv cpuminer.exe cpuminer-aes-sse42.exe strip -s cpuminer mv cpuminer cpuminer-aes-sse42 -# Nehalem SSE4.2 +# SSE4.2: Intel Nehalem make clean || echo clean rm -f config.status CFLAGS="-O3 -march=corei7 -Wall -fno-common" ./configure --with-curl make -j 8 -strip -s cpuminer.exe -mv cpuminer.exe cpuminer-sse42.exe strip -s cpuminer mv cpuminer cpuminer-sse42 -# Core2 SSSE3 +# SSSE3: Intel Core2 make clean || echo clean rm -f config.status CFLAGS="-O3 -march=core2 -Wall -fno-common" ./configure --with-curl make -j 8 -strip -s cpuminer.exe -mv cpuminer.exe cpuminer-ssse3.exe strip -s cpuminer mv cpuminer cpuminer-ssse3 -# Generic SSE2 +# SSE2 make clean || echo clean rm -f config.status CFLAGS="-O3 -msse2 -Wall -fno-common" ./configure --with-curl make -j 8 -strip -s cpuminer.exe -mv cpuminer.exe cpuminer-sse2.exe strip -s cpuminer mv cpuminer cpuminer-sse2 -# AMD Zen1 AVX2 SHA -make clean || echo done -rm -f config.status -CFLAGS="-O3 -march=znver1 -Wall -fno-common" ./configure --with-curl -make -j 8 -strip -s cpuminer.exe -mv cpuminer.exe cpuminer-zen.exe -strip -s cpuminer -mv cpuminer cpuminer-zen - -# AMD Zen3 AVX2 SHA VAES -make clean || echo done -rm -f config.status -CFLAGS="-O3 -march=znver2 -mvaes -Wall -fno-common" ./configure --with-curl -# CFLAGS="-O3 -march=znver3 -Wall -fno-common" ./configure --with-curl -make -j 8 -strip -s cpuminer.exe -mv cpuminer.exe cpuminer-zen3.exe -strip -s cpuminer -mv cpuminer cpuminer-zen3 - -# Native to current CPU +# Native to host CPU make clean || echo done rm -f config.status CFLAGS="-O3 -march=native -Wall -fno-common" ./configure --with-curl make -j 8 -strip -s cpuminer.exe strip -s cpuminer diff --git a/clean-all.sh b/clean-all.sh index e91bbb5b..87183d5e 100755 --- a/clean-all.sh +++ b/clean-all.sh @@ -2,8 +2,8 @@ # # make clean and rm all the targetted executables. -rm cpuminer-avx512-sha-vaes cpuminer-avx512-sha cpuminer-avx512 cpuminer-avx2 cpuminer-avx cpuminer-aes-sse42 cpuminer-sse2 cpuminer-zen cpuminer-sse42 cpuminer-ssse3 cpuminer-zen3 > /dev/null +rm cpuminer-avx512-sha-vaes cpuminer-avx512 cpuminer-avx2 cpuminer-avx cpuminer-aes-sse42 cpuminer-sse2 cpuminer-avx2-sha cpuminer-sse42 cpuminer-ssse3 cpuminer-avx2-sha-vaes > /dev/null -rm cpuminer-avx512-sha-vaes.exe cpuminer-avx512-sha.exe cpuminer-avx512.exe cpuminer-avx2.exe cpuminer-avx.exe cpuminer-aes-sse42.exe cpuminer-sse2.exe cpuminer-zen.exe cpuminer-sse42.exe cpuminer-ssse3.exe cpuminer-zen3.exe > /dev/null +rm cpuminer-avx512-sha-vaes.exe cpuminer-avx512-sha.exe cpuminer-avx512.exe cpuminer-avx2.exe cpuminer-avx.exe cpuminer-aes-sse42.exe cpuminer-sse2.exe cpuminer-avx2-sha.exe cpuminer-sse42.exe cpuminer-ssse3.exe cpuminer-avx2-sha-vaes.exe > /dev/null make distclean > /dev/null diff --git a/configure b/configure index b93191f8..eca6ff1f 100755 --- a/configure +++ b/configure @@ -1,6 +1,6 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.18.3. +# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.19.1. # # # Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc. @@ -577,8 +577,8 @@ MAKEFLAGS= # Identity of this package. PACKAGE_NAME='cpuminer-opt' PACKAGE_TARNAME='cpuminer-opt' -PACKAGE_VERSION='3.18.3' -PACKAGE_STRING='cpuminer-opt 3.18.3' +PACKAGE_VERSION='3.19.1' +PACKAGE_STRING='cpuminer-opt 3.19.1' PACKAGE_BUGREPORT='' PACKAGE_URL='' @@ -1332,7 +1332,7 @@ if test "$ac_init_help" = "long"; then # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -\`configure' configures cpuminer-opt 3.18.3 to adapt to many kinds of systems. +\`configure' configures cpuminer-opt 3.19.1 to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... @@ -1404,7 +1404,7 @@ fi if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of cpuminer-opt 3.18.3:";; + short | recursive ) echo "Configuration of cpuminer-opt 3.19.1:";; esac cat <<\_ACEOF @@ -1509,7 +1509,7 @@ fi test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF -cpuminer-opt configure 3.18.3 +cpuminer-opt configure 3.19.1 generated by GNU Autoconf 2.69 Copyright (C) 2012 Free Software Foundation, Inc. @@ -2012,7 +2012,7 @@ cat >config.log <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by cpuminer-opt $as_me 3.18.3, which was +It was created by cpuminer-opt $as_me 3.19.1, which was generated by GNU Autoconf 2.69. Invocation command line was $ $0 $@ @@ -2993,7 +2993,7 @@ fi # Define the identity of the package. PACKAGE='cpuminer-opt' - VERSION='3.18.3' + VERSION='3.19.1' cat >>confdefs.h <<_ACEOF @@ -6690,7 +6690,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" -This file was extended by cpuminer-opt $as_me 3.18.3, which was +This file was extended by cpuminer-opt $as_me 3.19.1, which was generated by GNU Autoconf 2.69. Invocation command line was CONFIG_FILES = $CONFIG_FILES @@ -6756,7 +6756,7 @@ _ACEOF cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`" ac_cs_version="\\ -cpuminer-opt config.status 3.18.3 +cpuminer-opt config.status 3.19.1 configured by $0, generated by GNU Autoconf 2.69, with options \\"\$ac_cs_config\\" diff --git a/configure.ac b/configure.ac index 8b80c385..11d4e595 100644 --- a/configure.ac +++ b/configure.ac @@ -1,4 +1,4 @@ -AC_INIT([cpuminer-opt], [3.19.0]) +AC_INIT([cpuminer-opt], [3.19.1]) AC_PREREQ([2.59c]) AC_CANONICAL_SYSTEM diff --git a/cpu-miner.c b/cpu-miner.c index 179881c6..ee31ae58 100644 --- a/cpu-miner.c +++ b/cpu-miner.c @@ -1038,9 +1038,17 @@ void report_summary_log( bool force ) #endif - if ( !( force && ( submit_sum || ( et.tv_sec > 5 ) ) ) - && ( et.tv_sec < 300 ) ) - return; + if ( !( force && ( submit_sum || ( et.tv_sec > 5 ) ) ) ) + { + if ( et.tv_sec < 300 ) + return; + if ( ( s_get_ptr != s_put_ptr ) && ( et.tv_sec < 360 ) ) + return; + } + +// if ( !( force && ( submit_sum || ( et.tv_sec > 5 ) ) ) +// && ( et.tv_sec < 300 ) ) +// return; // collect and reset periodic counters pthread_mutex_lock( &stats_lock ); @@ -1983,6 +1991,10 @@ void set_work_data_big_endian( struct work *work ) // calculate net diff from nbits. double std_calc_network_diff( struct work* work ) { + uint32_t nbits = work->data[ algo_gate.nbits_index ]; + uint32_t shift = nbits & 0xff; + uint32_t bits = bswap_32( nbits ) & 0x00ffffff; +/* // sample for diff 43.281 : 1c05ea29 // todo: endian reversed on longpoll could be zr5 specific... int nbits_index = algo_gate.nbits_index; @@ -1990,15 +2002,17 @@ double std_calc_network_diff( struct work* work ) : swab32( work->data[ nbits_index ] ); uint32_t bits = ( nbits & 0xffffff ); int16_t shift = ( swab32(nbits) & 0xff ); // 0x1c = 28 +*/ + int m; - double d = (double)0x0000ffff / (double)bits; + long double d = (long double)0x0000ffff / (long double)bits; for ( m = shift; m < 29; m++ ) d *= 256.0; for ( m = 29; m < shift; m++ ) d /= 256.0; if ( opt_debug_diff ) - applog(LOG_DEBUG, "net diff: %f -> shift %u, bits %08x", d, shift, bits); - return d; + applog(LOG_DEBUG, "net diff: %8f -> shift %u, bits %08x", (double)d, shift, bits); + return (double)d; } void std_get_new_work( struct work* work, struct work* g_work, int thr_id, @@ -2137,7 +2151,7 @@ static void stratum_gen_work( struct stratum_ctx *sctx, struct work *g_work ) uint64_t net_ttf = ( last_block_height - session_first_block ) == 0 ? 0 : et.tv_sec / ( last_block_height - session_first_block ); - if ( net_diff && net_ttf ) + if ( net_diff > 0. && net_ttf ) { double net_hr = nd / net_ttf; char net_hr_units[4] = {0}; @@ -2572,7 +2586,7 @@ static void *longpoll_thread(void *userdata) if (!opt_quiet) { char netinfo[64] = { 0 }; - if (net_diff > 0.) + if ( net_diff > 0. ) { sprintf(netinfo, ", diff %.3f", net_diff); } @@ -2844,7 +2858,6 @@ static bool cpu_capability( bool display_only ) bool algo_has_avx512 = set_incl( AVX512_OPT, algo_features ); bool algo_has_sha = set_incl( SHA_OPT, algo_features ); bool algo_has_vaes = set_incl( VAES_OPT, algo_features ); - bool algo_has_vaes256 = set_incl( VAES256_OPT, algo_features ); bool use_aes; bool use_sse2; bool use_sse42; @@ -2924,14 +2937,13 @@ static bool cpu_capability( bool display_only ) if ( algo_features == EMPTY_SET ) printf( " None" ); else { - if ( algo_has_avx512 ) printf( " AVX512" ); - else if ( algo_has_avx2 ) printf( " AVX2 " ); - else if ( algo_has_sse42 ) printf( " SSE4.2" ); - else if ( algo_has_sse2 ) printf( " SSE2 " ); - if ( algo_has_vaes || - algo_has_vaes256 ) printf( " VAES" ); - else if ( algo_has_aes ) printf( " AES" ); - if ( algo_has_sha ) printf( " SHA" ); + if ( algo_has_avx512 ) printf( " AVX512" ); + else if ( algo_has_avx2 ) printf( " AVX2 " ); + else if ( algo_has_sse42 ) printf( " SSE4.2" ); + else if ( algo_has_sse2 ) printf( " SSE2 " ); + if ( algo_has_vaes ) printf( " VAES" ); + else if ( algo_has_aes ) printf( " AES" ); + if ( algo_has_sha ) printf( " SHA" ); } } printf("\n"); @@ -2973,8 +2985,7 @@ static bool cpu_capability( bool display_only ) use_avx2 = cpu_has_avx2 && sw_has_avx2 && algo_has_avx2; use_avx512 = cpu_has_avx512 && sw_has_avx512 && algo_has_avx512; use_sha = cpu_has_sha && sw_has_sha && algo_has_sha; - use_vaes = cpu_has_vaes && sw_has_vaes && ( algo_has_vaes - || algo_has_vaes256 ); + use_vaes = cpu_has_vaes && sw_has_vaes && algo_has_vaes; use_none = !( use_sse2 || use_aes || use_avx512 || use_avx2 || use_sha || use_vaes ); diff --git a/winbuild-cross.sh b/winbuild-cross.sh index 71e42981..ec738593 100755 --- a/winbuild-cross.sh +++ b/winbuild-cross.sh @@ -16,9 +16,9 @@ export MINGW_LIB="/usr/x86_64-w64-mingw32/lib" export GCC_MINGW_LIB="/usr/lib/gcc/x86_64-w64-mingw32/9.3-win32" # used by GCC export LDFLAGS="-L$LOCAL_LIB/curl/lib/.libs -L$LOCAL_LIB/gmp/.libs -L$LOCAL_LIB/openssl" -# support for Windows CPU groups -export DEFAULT_CFLAGS="-O3 -Wall -D_WIN32_WINNT=0x0601" -#export DEFAULT_CFLAGS="-O3 -Wall" +# support for Windows CPU groups, AES sometimes not included in -march +export DEFAULT_CFLAGS="-O3 -maes -Wall -D_WIN32_WINNT=0x0601" +export DEFAULT_CFLAGS_OLD="-O3 -Wall" # make link to local gmp header file. ln -s $LOCAL_LIB/gmp/gmp.h ./gmp.h @@ -41,7 +41,7 @@ cp $LOCAL_LIB/curl/lib/.libs/libcurl-4.dll release/ # Start building... -# Icelake AVX512 SHA VAES +# AVX512 SHA VAES: Intel Core Icelake, Rocketlake ./clean-all.sh || echo clean rm -f config.status ./autogen.sh || echo done @@ -50,65 +50,50 @@ make -j 8 strip -s cpuminer.exe mv cpuminer.exe release/cpuminer-avx512-sha-vaes.exe -# Rocketlake AVX512 SHA AES +# AVX512 AES: Intel Core HEDT Slylake-X, Cascadelake make clean || echo clean rm -f config.status -CFLAGS="$DEFAULT_CFLAGS -march=cascadelake -msha" ./configure $CONFIGURE_ARGS -#CFLAGS="$DEFAULT_CFLAGS -march=rocketlake" ./configure $CONFIGURE_ARGS -make -j 8 -strip -s cpuminer.exe -mv cpuminer.exe release/cpuminer-avx512-sha.exe - -# Zen1 AVX2 AES SHA -make clean || echo clean -rm -f config.status -CFLAGS="$DEFAULT_CFLAGS -march=znver1" ./configure $CONFIGURE_ARGS +CFLAGS="$DEFAULT_CFLAGS -march=skylake-avx512" ./configure $CONFIGURE_ARGS make -j 8 strip -s cpuminer.exe -mv cpuminer.exe release/cpuminer-zen.exe +mv cpuminer.exe release/cpuminer-avx512.exe -# Zen3 AVX2 SHA VAES -make clean || echo clean +# AVX2 SHA VAES: Intel Alderlake, AMD Zen3 +make clean || echo done rm -f config.status -CFLAGS="$DEFAULT_CFLAGS -march=znver2 -mvaes" ./configure $CONFIGURE_ARGS -# CFLAGS="$DEFAULT_CFLAGS -march=znver3" ./configure $CONFIGURE_ARGS +CFLAGS="$DEFAULT_CFLAGS -mavx2 -msha -mvaes" ./configure $CONFIGURE_ARGS make -j 8 strip -s cpuminer.exe -mv cpuminer.exe release/cpuminer-zen3.exe +mv cpuminer.exe release/cpuminer-avx2-sha-vaes.exe -# Slylake-X AVX512 AES -# mingw won't compile avx512 without -fno-asynchronous-unwind-tables +# AVX2 AES SHA: AMD Zen1 make clean || echo clean rm -f config.status -CFLAGS="$DEFAULT_CFLAGS -march=skylake-avx512" ./configure $CONFIGURE_ARGS -#CFLAGS="-O3 -march=skylake-avx512 -Wall -fno-asynchronous-unwind-tables" ./configure $CONFIGURE_ARGS +CFLAGS="$DEFAULT_CFLAGS -march=znver1" ./configure $CONFIGURE_ARGS make -j 8 strip -s cpuminer.exe -mv cpuminer.exe release/cpuminer-avx512.exe +mv cpuminer.exe release/cpuminer-avx2-sha.exe -# Haswell AVX2 AES +# AVX2 AES: Intel Core Haswell, Skylake, Kabylake, Coffeelake, Cometlake make clean || echo clean rm -f config.status -# GCC 9 doesn't include AES in -march=core-avx2 -CFLAGS="$DEFAULT_CFLAGS -march=core-avx2 -maes" ./configure $CONFIGURE_ARGS +CFLAGS="$DEFAULT_CFLAGS -march=core-avx2" ./configure $CONFIGURE_ARGS make -j 8 strip -s cpuminer.exe mv cpuminer.exe release/cpuminer-avx2.exe -# Sandybridge AVX AES +# AVX AES: Intel Sandybridge, Ivybridge make clean || echo clean rm -f config.status -# -march=corei7-avx still includes aes, but just in case -CFLAGS="$DEFAULT_CFLAGS -march=corei7-avx -maes" ./configure $CONFIGURE_ARGS +CFLAGS="$DEFAULT_CFLAGS_OLD -march=corei7-avx -maes" ./configure $CONFIGURE_ARGS make -j 8 strip -s cpuminer.exe mv cpuminer.exe release/cpuminer-avx.exe -# Westmere SSE4.2 AES +# SSE4.2 AES: Intel Westmere make clean || echo clean rm -f config.status -CFLAGS="$DEFAULT_CFLAGS -march=westmere -maes" ./configure $CONFIGURE_ARGS -#CFLAGS="-O3 -maes -msse4.2 -Wall" ./configure $CONFIGURE_ARGS +CFLAGS="$DEFAULT_CFLAGS_OLD -march=westmere -maes" ./configure $CONFIGURE_ARGS make -j 8 strip -s cpuminer.exe mv cpuminer.exe release/cpuminer-aes-sse42.exe @@ -116,7 +101,7 @@ mv cpuminer.exe release/cpuminer-aes-sse42.exe # Nehalem SSE4.2 #make clean || echo clean #rm -f config.status -#CFLAGS="$DEFAULT_CFLAGS -march=corei7" ./configure $CONFIGURE_ARGS +#CFLAGS="$DEFAULT_CFLAGS_OLD -march=corei7" ./configure $CONFIGURE_ARGS #make #strip -s cpuminer.exe #mv cpuminer.exe release/cpuminer-sse42.exe @@ -124,7 +109,7 @@ mv cpuminer.exe release/cpuminer-aes-sse42.exe # Core2 SSSE3 #make clean || echo clean #rm -f config.status -#CFLAGS="$DEFAULT_CFLAGS -march=core2" ./configure $CONFIGURE_ARGS +#CFLAGS="$DEFAULT_CFLAGS_OLD -march=core2" ./configure $CONFIGURE_ARGS #make #strip -s cpuminer.exe #mv cpuminer.exe release/cpuminer-ssse3.exe @@ -133,7 +118,7 @@ mv cpuminer.exe release/cpuminer-aes-sse42.exe # Generic SSE2 make clean || echo clean rm -f config.status -CFLAGS="$DEFAULT_CFLAGS -msse2" ./configure $CONFIGURE_ARGS +CFLAGS="$DEFAULT_CFLAGS_OLD -msse2" ./configure $CONFIGURE_ARGS make -j 8 strip -s cpuminer.exe mv cpuminer.exe release/cpuminer-sse2.exe