From 0a78013cbe13358ab9130934bf90d8817f539160 Mon Sep 17 00:00:00 2001 From: Jay D Dee Date: Sun, 12 Nov 2023 18:48:50 -0500 Subject: [PATCH] v23.9 --- RELEASE_NOTES | 5 +++++ algo/blake/blake512-hash.c | 6 ++++-- algo/x16/minotaur.c | 22 ++++++++++---------- configure | 20 +++++++++---------- configure.ac | 2 +- configure~ | 14 ------------- simd-utils/simd-128.h | 41 +++++++++++++++++++------------------- simd-utils/simd-neon.h | 12 +++++------ 8 files changed, 57 insertions(+), 65 deletions(-) diff --git a/RELEASE_NOTES b/RELEASE_NOTES index a553a3f5..4f532716 100644 --- a/RELEASE_NOTES +++ b/RELEASE_NOTES @@ -73,6 +73,11 @@ If not what makes it happen or not happen? Change Log ---------- +v23.9 + +x86_64: fixed minotaurx crash, broken in 23.7. +ARM: #407 fix compile error due to incorrect type casting for vrev instruction argument. + v23.8 Cpuminer-opt is no longer dependant on OpenSSL. diff --git a/algo/blake/blake512-hash.c b/algo/blake/blake512-hash.c index c4ee3033..2ab7159e 100644 --- a/algo/blake/blake512-hash.c +++ b/algo/blake/blake512-hash.c @@ -475,11 +475,12 @@ void blake512_update(blake512_context *sc, const void *data, size_t len) void blake512_close( blake512_context *sc, void *dst ) { unsigned char buf[128] __attribute__((aligned(32))); - size_t ptr; + size_t ptr, k; unsigned bit_len; uint64_t th, tl; ptr = sc->ptr; + memcpy( buf, sc->buf, ptr ); bit_len = ((unsigned)ptr << 3); buf[ptr] = 0x80; tl = sc->T0 + bit_len; @@ -519,7 +520,8 @@ void blake512_close( blake512_context *sc, void *dst ) blake512_update( sc, buf, 128 ); } - v128_block_bswap64_512( dst, sc->H ); + for ( k = 0; k < 8; k ++ ) + ((uint64_t*)dst)[k] = bswap_64( sc->H[k] ); } void blake512_full( blake512_context *sc, void *dst, const void *data, diff --git a/algo/x16/minotaur.c b/algo/x16/minotaur.c index af196207..da97d218 100644 --- a/algo/x16/minotaur.c +++ b/algo/x16/minotaur.c @@ -20,13 +20,12 @@ #include "algo/whirlpool/sph_whirlpool.h" #include "algo/sha/sph_sha2.h" #include "algo/yespower/yespower.h" -//#if defined(__AES__) || defined(__ARM_FEATURE_AES) +#if defined(__AES__) || defined(__ARM_FEATURE_AES) #include "algo/echo/aes_ni/hash_api.h" #include "algo/groestl/aes_ni/hash-groestl.h" -//#else - #include "algo/echo/sph_echo.h" - #include "algo/groestl/sph_groestl.h" -//#endif +#endif +#include "algo/echo/sph_echo.h" +#include "algo/groestl/sph_groestl.h" #if defined(__AES__) #include "algo/fugue/fugue-aesni.h" #else @@ -48,12 +47,15 @@ typedef struct TortureGarden TortureGarden; struct TortureGarden { #if defined(__AES__) // || defined(__ARM_FEATURE_AES) - hashState_echo echo; hashState_groestl groestl; #else - sph_echo512_context echo; sph_groestl512_context groestl; #endif +#if defined(__AES__) || defined(__ARM_FEATURE_AES) + hashState_echo echo; +#else + sph_echo512_context echo; +#endif #if defined(__AES__) hashState_fugue fugue; #else @@ -93,9 +95,7 @@ static int get_hash( void *output, const void *input, TortureGarden *garden, switch ( algo ) { case 0: - blake512_init( &garden->blake ); - blake512_update( &garden->blake, input, 64 ); - blake512_close( &garden->blake, hash ); + blake512_full( &garden->blake, hash, input, 64 ); break; case 1: sph_bmw512_init( &garden->bmw ); @@ -107,7 +107,7 @@ static int get_hash( void *output, const void *input, TortureGarden *garden, cubehashUpdateDigest( &garden->cube, hash, input, 64 ); break; case 3: -#if defined(__AES__) // || defined(__ARM_FEATURE_AES) +#if defined(__AES__) || defined(__ARM_FEATURE_AES) echo_full( &garden->echo, hash, 512, input, 64 ); #else sph_echo512_init( &garden->echo ); diff --git a/configure b/configure index 95a00e24..94a78192 100755 --- a/configure +++ b/configure @@ -1,6 +1,6 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.71 for cpuminer-opt 23.8. +# Generated by GNU Autoconf 2.71 for cpuminer-opt 23.9. # # # Copyright (C) 1992-1996, 1998-2017, 2020-2021 Free Software Foundation, @@ -608,8 +608,8 @@ MAKEFLAGS= # Identity of this package. PACKAGE_NAME='cpuminer-opt' PACKAGE_TARNAME='cpuminer-opt' -PACKAGE_VERSION='23.8' -PACKAGE_STRING='cpuminer-opt 23.8' +PACKAGE_VERSION='23.9' +PACKAGE_STRING='cpuminer-opt 23.9' PACKAGE_BUGREPORT='' PACKAGE_URL='' @@ -1360,7 +1360,7 @@ if test "$ac_init_help" = "long"; then # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -\`configure' configures cpuminer-opt 23.8 to adapt to many kinds of systems. +\`configure' configures cpuminer-opt 23.9 to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... @@ -1432,7 +1432,7 @@ fi if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of cpuminer-opt 23.8:";; + short | recursive ) echo "Configuration of cpuminer-opt 23.9:";; esac cat <<\_ACEOF @@ -1538,7 +1538,7 @@ fi test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF -cpuminer-opt configure 23.8 +cpuminer-opt configure 23.9 generated by GNU Autoconf 2.71 Copyright (C) 2021 Free Software Foundation, Inc. @@ -1985,7 +1985,7 @@ cat >config.log <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by cpuminer-opt $as_me 23.8, which was +It was created by cpuminer-opt $as_me 23.9, which was generated by GNU Autoconf 2.71. Invocation command line was $ $0$ac_configure_args_raw @@ -3593,7 +3593,7 @@ fi # Define the identity of the package. PACKAGE='cpuminer-opt' - VERSION='23.8' + VERSION='23.9' printf "%s\n" "#define PACKAGE \"$PACKAGE\"" >>confdefs.h @@ -7508,7 +7508,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" -This file was extended by cpuminer-opt $as_me 23.8, which was +This file was extended by cpuminer-opt $as_me 23.9, which was generated by GNU Autoconf 2.71. Invocation command line was CONFIG_FILES = $CONFIG_FILES @@ -7576,7 +7576,7 @@ ac_cs_config_escaped=`printf "%s\n" "$ac_cs_config" | sed "s/^ //; s/'/'\\\\\\\\ cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_cs_config='$ac_cs_config_escaped' ac_cs_version="\\ -cpuminer-opt config.status 23.8 +cpuminer-opt config.status 23.9 configured by $0, generated by GNU Autoconf 2.71, with options \\"\$ac_cs_config\\" diff --git a/configure.ac b/configure.ac index caefba48..37033443 100644 --- a/configure.ac +++ b/configure.ac @@ -1,4 +1,4 @@ -AC_INIT([cpuminer-opt], [23.8]) +AC_INIT([cpuminer-opt], [23.9]) AC_PREREQ([2.59c]) AC_CANONICAL_SYSTEM diff --git a/configure~ b/configure~ index 38643133..95a00e24 100755 --- a/configure~ +++ b/configure~ @@ -657,8 +657,6 @@ JANSSON_LIBS LIBCURL_CPPFLAGS LIBCURL_CFLAGS LIBCURL -HAVE_MACOS_FALSE -HAVE_MACOS_TRUE MINGW_FALSE MINGW_TRUE ARCH_ARM_FALSE @@ -6881,14 +6879,6 @@ else MINGW_FALSE= fi - if test "x$OS" = "xAPPLE"; then - HAVE_MACOS_TRUE= - HAVE_MACOS_FALSE='#' -else - HAVE_MACOS_TRUE='#' - HAVE_MACOS_FALSE= -fi - if test x$request_jansson = xtrue ; then JANSSON_LIBS="compat/jansson/libjansson.a" @@ -7128,10 +7118,6 @@ if test -z "${MINGW_TRUE}" && test -z "${MINGW_FALSE}"; then as_fn_error $? "conditional \"MINGW\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi -if test -z "${HAVE_MACOS_TRUE}" && test -z "${HAVE_MACOS_FALSE}"; then - as_fn_error $? "conditional \"HAVE_MACOS\" was never defined. -Usually this means the macro was only invoked conditionally." "$LINENO" 5 -fi : "${CONFIG_STATUS=./config.status}" ac_write_fail=0 diff --git a/simd-utils/simd-128.h b/simd-utils/simd-128.h index 90b9fb3f..787cd676 100644 --- a/simd-utils/simd-128.h +++ b/simd-utils/simd-128.h @@ -65,8 +65,6 @@ #define v128_add32 _mm_add_epi32 #define v128_add16 _mm_add_epi16 #define v128_add8 _mm_add_epi8 -#define v128_add4_64 mm128_add4_64 -#define v128_add4_32 mm128_add4_32 #define v128_sub64 _mm_sub_epi64 #define v128_sub32 _mm_sub_epi32 @@ -120,8 +118,8 @@ #define v128_xor _mm_xor_si128 #define v128_xorq _mm_xor_si128 #define v128_andnot _mm_andnot_si128 -#define v128_xnor( a, b ) mm128_not( _mm_xor_si128( a, b ) ) -#define v128_ornot( a, b ) mm128_or( a, mm128_not( b ) ) +#define v128_xnor( a, b ) v128_not( _mm_xor_si128( a, b ) ) +#define v128_ornot( a, b ) _mm_or_si128( a, v128_not( b ) ) // ternary #define v128_xorandnot( v2, v1, v0 ) \ @@ -135,13 +133,6 @@ #define v128_xoror( a, b, c ) _mm_xor_si128( a, _mm_or_si128( b, c ) ) #define v128_orand( a, b, c ) _mm_or_si128( a, _mm_and_si128( b, c ) ) -// shift 2 concatenated vectors right -#define v128_alignr64 mm128_alignr_64 -#define v128_alignr32 mm128_alignr_32 -#if defined(__SSSE3__) - #define v128_alignr8 _mm_alignr_epi8 -#endif - // unpack #define v128_unpacklo64 _mm_unpacklo_epi64 #define v128_unpackhi64 _mm_unpackhi_epi64 @@ -404,21 +395,24 @@ static inline __m128i mm128_negate_16( __m128i v ) // Add 4 values, fewer dependencies than sequential addition. -#define mm128_add4_64( a, b, c, d ) \ +#define v128_add4_64( a, b, c, d ) \ _mm_add_epi64( _mm_add_epi64( a, b ), _mm_add_epi64( c, d ) ) +#define mm128_add4_64 v128_add4_64 -#define mm128_add4_32( a, b, c, d ) \ +#define v128_add4_32( a, b, c, d ) \ _mm_add_epi32( _mm_add_epi32( a, b ), _mm_add_epi32( c, d ) ) -#define v128_add4_32 mm128_add4_32 +#define mm128_add4_32 v128_add4_32 -#define mm128_add4_16( a, b, c, d ) \ +#define v128_add4_16( a, b, c, d ) \ _mm_add_epi16( _mm_add_epi16( a, b ), _mm_add_epi16( c, d ) ) -#define mm128_add4_8( a, b, c, d ) \ +#define v128_add4_8( a, b, c, d ) \ _mm_add_epi8( _mm_add_epi8( a, b ), _mm_add_epi8( c, d ) ) -#define mm128_xor4( a, b, c, d ) \ +#define v128_xor4( a, b, c, d ) \ _mm_xor_si128( _mm_xor_si128( a, b ), _mm_xor_si128( c, d ) ) +#define mm128_xor4 v128_xor4 + // Memory functions // Mostly for convenience, avoids calculating bytes. @@ -984,18 +978,23 @@ static inline void mm128_block_bswap32_512( __m128i *d, const __m128i *s ) #if defined(__SSSE3__) -#define mm128_alignr_64( hi, lo, c ) _mm_alignr_epi8( hi, lo, (c)*8 ) -#define mm128_alignr_32( hi, lo, c ) _mm_alignr_epi8( hi, lo, (c)*4 ) +#define v128_alignr8 _mm_alignr_epi8 +#define v128_alignr64( hi, lo, c ) _mm_alignr_epi8( hi, lo, (c)*8 ) +#define v128_alignr32( hi, lo, c ) _mm_alignr_epi8( hi, lo, (c)*4 ) #else -#define mm128_alignr_64( hi, lo, c ) \ +#define v128_alignr64( hi, lo, c ) \ _mm_or_si128( _mm_slli_si128( hi, (c)*8 ), _mm_srli_si128( lo, (c)*8 ) ) -#define mm128_alignr_32( hi, lo, c ) \ +#define v128_alignr32( hi, lo, c ) \ _mm_or_si128( _mm_slli_si128( lo, (c)*4 ), _mm_srli_si128( hi, (c)*4 ) ) #endif +#define mm128_alignr_64 v128_alignr64 +#define mm128_alignr_32 v128_alignr32 +#define mm128_alignr_8 v128_alignr32 + // NEON only uses vector mask. x86 blend selects second arg when control bit // is set. Blendv selects second arg when sign bit is set. And masking is the diff --git a/simd-utils/simd-neon.h b/simd-utils/simd-neon.h index 671a47a3..9f08802c 100644 --- a/simd-utils/simd-neon.h +++ b/simd-utils/simd-neon.h @@ -336,27 +336,27 @@ static inline void v128_memcpy( void *dst, const void *src, const int n ) // Bit rotation #define v128_ror64( v, c ) \ - ( (c) == 32 ) ? (uint64x2_t)vrev64q_u32( ((uint64x2_t)v) ) \ + ( (c) == 32 ) ? (uint64x2_t)vrev64q_u32( ((uint32x4_t)v) ) \ : vsriq_n_u64( vshlq_n_u64( ((uint64x2_t)v), 64-c ), ((uint64x2_t)v), c ) #define v128_rol64( v, c ) \ - ( (c) == 32 ) ? (uint64x2_t)vrev64q_u32( ((uint64x2_t)v) ) \ + ( (c) == 32 ) ? (uint64x2_t)vrev64q_u32( ((uint32x4_t)v) ) \ : vsliq_n_u64( vshrq_n_u64( ((uint64x2_t)v), 64-c ), ((uint64x2_t)v), c ) #define v128_ror32( v, c ) \ - ( (c) == 16 ) ? (uint32x4_t)vrev32q_u16( ((uint32x4_t)v) ) \ + ( (c) == 16 ) ? (uint32x4_t)vrev32q_u16( ((uint16x8_t)v) ) \ : vsriq_n_u32( vshlq_n_u32( ((uint32x4_t)v), 32-c ), ((uint32x4_t)v), c ) #define v128_rol32( v, c ) \ - ( (c) == 16 ) ? (uint32x4_t)vrev32q_u16( ((uint32x4_t)v) ) \ + ( (c) == 16 ) ? (uint32x4_t)vrev32q_u16( ((uint16x8_t)v) ) \ : vsliq_n_u32( vshrq_n_u32( ((uint32x4_t)v), 32-c ), ((uint32x4_t)v), c ) #define v128_ror16( v, c ) \ - ( (c) == 8 ) ? (uint16x8_t)vrev16q_u8( ((uint16x8_t)v) ) \ + ( (c) == 8 ) ? (uint16x8_t)vrev16q_u8( ((uint8x16_t)v) ) \ : vsriq_n_u16( vshlq_n_u16( ((uint16x8_t)v), 16-c ), ((uint16x8_t)v), c ) #define v128_rol16( v, c ) \ - ( (c) == 8 ) ? (uint16x8_t)vrev16q_u8( ((uint16x8_t)v) ) \ + ( (c) == 8 ) ? (uint16x8_t)vrev16q_u8( ((uint8x16_t)v) ) \ : vsliq_n_u16( vshrq_n_u16( ((uint16x8_t)v), 16-c ), ((uint16x8_t)v), c ) #define v128_ror8( v, c ) \