diff --git a/CMakeLists.txt b/CMakeLists.txt index f9916fbbb..78443acb3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -35,8 +35,10 @@ if( VVENC_TARGET_ARCH STREQUAL "ARM" ) endif() # we enable x86 intrinsics for all target architectures, because they are implemented through simd-everywhere on non-x86 -set( VVENC_ENABLE_X86_SIMD TRUE CACHE BOOL "enable x86 intrinsics" ) -set( VVENC_ENABLE_ARM_SIMD ${VVENC_ARM_SIMD_DEFAULT} CACHE BOOL "enable ARM intrinsics" ) +set( VVENC_ENABLE_X86_SIMD TRUE CACHE BOOL "Enable x86 intrinsics" ) +set( VVENC_ENABLE_ARM_SIMD ${VVENC_ARM_SIMD_DEFAULT} CACHE BOOL "Enable Arm Neon intrinsics" ) +set( VVENC_ENABLE_ARM_SIMD_SVE FALSE CACHE BOOL "Enable Arm SVE intrinsics" ) +set( VVENC_ENABLE_ARM_SIMD_SVE2 FALSE CACHE BOOL "Enable Arm SVE2 intrinsics" ) check_problematic_compiler( VVENC_PROBLEMATIC_COMPILER "MSVC" 19.38 19.39 ) if( VVENC_PROBLEMATIC_COMPILER ) @@ -72,12 +74,35 @@ if( VVENC_ENABLE_X86_SIMD ) endif() message( STATUS "x86 SIMD intrinsics enabled (using SIMDE for non-x86 targets)" ) - add_compile_definitions( TARGET_SIMD_X86 ) + add_compile_definitions( TARGET_SIMD_X86=1 ) endif() -if( VVENC_ENABLE_ARM_SIMD ) - message( STATUS "ARM SIMD intrinsics enabled" ) - add_compile_definitions( TARGET_SIMD_ARM ) +if( VVENC_TARGET_ARCH STREQUAL "ARM" ) + if( VVENC_ENABLE_ARM_SIMD ) + message( STATUS "Arm Neon intrinsics enabled" ) + add_compile_definitions( TARGET_SIMD_ARM=1 ) + else() + message( STATUS "Arm Neon intrinsics disabled, disabling Arm SVE/SVE2 intrinsics" ) + # If Neon is disabled make sure that SVE/SVE2 are also disabled. + set( VVENC_ENABLE_ARM_SIMD_SVE FALSE ) + set( VVENC_ENABLE_ARM_SIMD_SVE2 FALSE ) + endif() + + if( VVENC_ENABLE_ARM_SIMD_SVE ) + message( STATUS "Arm SVE intrinsics enabled" ) + add_compile_definitions( TARGET_SIMD_ARM_SVE=1 ) + else() + message( STATUS "Arm SVE intrinsics disabled, disabling Arm SVE2 intrinsics" ) + # If SVE is disabled make sure that SVE2 are also disabled. + set( VVENC_ENABLE_ARM_SIMD_SVE2 FALSE ) + endif() + + if( VVENC_ENABLE_ARM_SIMD_SVE2 ) + message( STATUS "Arm SVE2 intrinsics enabled" ) + add_compile_definitions( TARGET_SIMD_ARM_SVE2=1 ) + else() + message( STATUS "Arm SVE2 intrinsics disabled" ) + endif() endif() if( NOT CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR ) diff --git a/source/Lib/CommonLib/CommonDef.h b/source/Lib/CommonLib/CommonDef.h index 3a83aeba4..c70f810e4 100644 --- a/source/Lib/CommonLib/CommonDef.h +++ b/source/Lib/CommonLib/CommonDef.h @@ -728,12 +728,14 @@ namespace x86_simd namespace arm_simd { #ifdef TARGET_SIMD_ARM - typedef enum - { - UNDEFINED = -1, - SCALAR = 0, - NEON, - } ARM_VEXT; +typedef enum +{ + UNDEFINED = -1, + SCALAR = 0, + NEON, + SVE, + SVE2, +} ARM_VEXT; #endif // TARGET_SIMD_ARM } // namespace arm_simd diff --git a/source/Lib/CommonLib/arm/CommonDefARM.cpp b/source/Lib/CommonLib/arm/CommonDefARM.cpp index a63e24225..06dd65492 100644 --- a/source/Lib/CommonLib/arm/CommonDefARM.cpp +++ b/source/Lib/CommonLib/arm/CommonDefARM.cpp @@ -40,25 +40,127 @@ POSSIBILITY OF SUCH DAMAGE. ------------------------------------------------------------------------------------------- */ -/** \file CommonDefX86.cpp -*/ +/** \file CommonDefARM.cpp + */ #include "CommonDefARM.h" +#if defined( __linux__ ) +#include // getauxval +#endif + namespace vvenc { using namespace arm_simd; +const static std::vector> vext_names{ + { UNDEFINED, "" }, + { SCALAR, "SCALAR" }, + { NEON, "NEON" }, +#if TARGET_SIMD_ARM_SVE + { SVE, "SVE" }, +#endif +#if TARGET_SIMD_ARM_SVE2 + { SVE2, "SVE2" }, +#endif +}; + +const std::string& arm_vext_to_string( ARM_VEXT vext ) +{ + for( auto& it : vext_names ) + { + if( it.first == vext ) + { + return it.second; + } + } + THROW( "Invalid SIMD extension value " << vext ); +} + +ARM_VEXT string_to_arm_vext( const std::string& ext_name ) +{ + if( ext_name.empty() ) + { + return UNDEFINED; + } + + for( auto& it : vext_names ) + { + if( it.second == ext_name ) + { + return it.first; + } + } + + THROW( "Invalid SIMD Mode string: \"" << ext_name << "\"" ); +} + +#if defined( __linux__ ) + +// Define hwcap values ourselves: building with an old auxv header where these +// hwcap values are not defined should not prevent features from being enabled. +#define AARCH64_HWCAP_SVE ( 1 << 22 ) +#define AARCH64_HWCAP2_SVE2 ( 1 << 1 ) + +static ARM_VEXT _get_arm_extensions() +{ + // We assume Neon is always supported for relevant Arm processors. + ARM_VEXT ext = NEON; + +#if TARGET_SIMD_ARM_SVE + unsigned long hwcap = getauxval( AT_HWCAP ); +#endif +#if TARGET_SIMD_ARM_SVE2 + unsigned long hwcap2 = getauxval( AT_HWCAP2 ); +#endif + +#if TARGET_SIMD_ARM_SVE + if( hwcap & AARCH64_HWCAP_SVE ) + { + ext = SVE; +#if TARGET_SIMD_ARM_SVE2 + if( hwcap2 & AARCH64_HWCAP2_SVE2 ) + { + ext = SVE2; + } +#endif + } +#endif + + return ext; +} + +#else + +static ARM_VEXT _get_arm_extensions() +{ + // We assume Neon is always supported for relevant Arm processors. + // No other extensions supported on non-Linux platforms for now. + return NEON; +} + +#endif + ARM_VEXT read_arm_extension_flags( ARM_VEXT request ) { - static ARM_VEXT ext_flags = NEON; // We assume NEON is always supported for relevant ARM processors + static ARM_VEXT max_supported = _get_arm_extensions(); + static ARM_VEXT ext_flags = max_supported; if( request != UNDEFINED ) { + if( request > max_supported ) + { + THROW( "requested SIMD level (" << request << ") not supported by current CPU (max " << max_supported << ")." ); + } ext_flags = request; } return ext_flags; }; +const std::string& read_arm_extension_name() +{ + return arm_vext_to_string( read_arm_extension_flags() ); +} + } // namespace diff --git a/source/Lib/CommonLib/arm/CommonDefARM.h b/source/Lib/CommonLib/arm/CommonDefARM.h index fa859ca3d..9fb430cba 100644 --- a/source/Lib/CommonLib/arm/CommonDefARM.h +++ b/source/Lib/CommonLib/arm/CommonDefARM.h @@ -58,8 +58,11 @@ namespace vvenc { using namespace arm_simd; -ARM_VEXT read_arm_extension_flags( ARM_VEXT request = arm_simd::UNDEFINED ); -// std::string read_arm_extension_name(); +const std::string& arm_vext_to_string( ARM_VEXT vext ); +ARM_VEXT string_to_arm_vext( const std::string& ext_name ); + +ARM_VEXT read_arm_extension_flags( ARM_VEXT request = arm_simd::UNDEFINED ); +const std::string& read_arm_extension_name(); } // namespace diff --git a/source/Lib/CommonLib/arm/InitARM.cpp b/source/Lib/CommonLib/arm/InitARM.cpp index c96994ff7..c51e03db3 100644 --- a/source/Lib/CommonLib/arm/InitARM.cpp +++ b/source/Lib/CommonLib/arm/InitARM.cpp @@ -70,13 +70,9 @@ namespace vvenc void InterpolationFilter::initInterpolationFilterARM() { auto vext = read_arm_extension_flags(); - switch( vext ) + if( vext >= NEON ) { - case NEON: _initInterpolationFilterARM(); - break; - default: - break; } } #endif @@ -85,13 +81,9 @@ void InterpolationFilter::initInterpolationFilterARM() void PelBufferOps::initPelBufOpsARM() { auto vext = read_arm_extension_flags(); - switch( vext ) + if( vext >= NEON ) { - case NEON: _initPelBufOpsARM(); - break; - default: - break; } } #endif @@ -100,13 +92,9 @@ void PelBufferOps::initPelBufOpsARM() void RdCost::initRdCostARM() { auto vext = read_arm_extension_flags(); - switch( vext ) + if( vext >= NEON ) { - case NEON: _initRdCostARM(); - break; - default: - break; } } #endif @@ -115,13 +103,9 @@ void RdCost::initRdCostARM() void MCTF::initMCTF_ARM() { auto vext = read_arm_extension_flags(); - switch( vext ) + if( vext >= NEON ) { - case NEON: _initMCTF_ARM(); - break; - default: - break; } } #endif // ENABLE_SIMD_OPT_MCTF diff --git a/source/Lib/CommonLib/x86/CommonDefX86.cpp b/source/Lib/CommonLib/x86/CommonDefX86.cpp index 6b666d486..777a6ff00 100644 --- a/source/Lib/CommonLib/x86/CommonDefX86.cpp +++ b/source/Lib/CommonLib/x86/CommonDefX86.cpp @@ -79,7 +79,7 @@ const static std::map vext_names{ { UNDEFINED, "" }, { SC # endif # endif // !REAL_TARGET_X86 -const std::string& vext_to_string( X86_VEXT vext ) +const std::string& x86_vext_to_string( X86_VEXT vext ) { try { @@ -91,7 +91,7 @@ const std::string& vext_to_string( X86_VEXT vext ) } } -X86_VEXT string_to_vext( const std::string& ext_name ) +X86_VEXT string_to_x86_vext( const std::string& ext_name ) { if( ext_name.empty() ) { @@ -287,7 +287,7 @@ X86_VEXT read_x86_extension_flags( X86_VEXT request ) const std::string& read_x86_extension_name() { - return vext_to_string( read_x86_extension_flags() ); + return x86_vext_to_string( read_x86_extension_flags() ); } } // namespace vvenc diff --git a/source/Lib/CommonLib/x86/CommonDefX86.h b/source/Lib/CommonLib/x86/CommonDefX86.h index 0fe698037..993ff44a8 100644 --- a/source/Lib/CommonLib/x86/CommonDefX86.h +++ b/source/Lib/CommonLib/x86/CommonDefX86.h @@ -87,8 +87,8 @@ namespace vvenc using namespace x86_simd; -const std::string& vext_to_string( X86_VEXT vext ); -X86_VEXT string_to_vext( const std::string& ext_name ); +const std::string& x86_vext_to_string( X86_VEXT vext ); +X86_VEXT string_to_x86_vext( const std::string& ext_name ); X86_VEXT read_x86_extension_flags( X86_VEXT request = x86_simd::UNDEFINED ); const std::string& read_x86_extension_name(); diff --git a/source/Lib/vvenc/vvencimpl.cpp b/source/Lib/vvenc/vvencimpl.cpp index 2151250f8..9c9b764d1 100644 --- a/source/Lib/vvenc/vvencimpl.cpp +++ b/source/Lib/vvenc/vvencimpl.cpp @@ -802,19 +802,37 @@ const char* VVEncImpl::setSIMDExtension( const char* simdId ) try # endif // HANDLE_EXCEPTION { - X86_VEXT request_ext = string_to_vext( simdReqStr ); +#if defined( REAL_TARGET_ARM ) + ARM_VEXT arm_ext = string_to_arm_vext( simdReqStr ); + X86_VEXT x86_ext = arm_ext == arm_simd::UNDEFINED ? x86_simd::UNDEFINED + : arm_ext == arm_simd::SCALAR ? x86_simd::SCALAR + : SIMD_EVERYWHERE_EXTENSION_LEVEL; + try + { + read_x86_extension_flags( x86_ext ); + read_arm_extension_flags( arm_ext ); + } + catch( Exception& ) + { + // Not using the actual message from the exception here, because we need to insert the SIMD-level name instead of + // the enum. + THROW( "requested SIMD level (" << simdReqStr << ") not supported by current CPU (max " + << read_arm_extension_name() << ")." ); + } +#else + X86_VEXT request_ext = string_to_x86_vext( simdReqStr ); try { read_x86_extension_flags( request_ext ); -#if defined( TARGET_SIMD_ARM ) - read_arm_extension_flags( request_ext == x86_simd::UNDEFINED ? arm_simd::UNDEFINED : request_ext != x86_simd::SCALAR ? arm_simd::NEON : arm_simd::SCALAR ); -#endif } catch( Exception& ) { - // not using the actual message from the exception here, because we need to insert the SIMD-level name instead of the enum - THROW( "requested SIMD level (" << simdReqStr << ") not supported by current CPU (max " << read_x86_extension_name() << ")." ); + // Not using the actual message from the exception here, because we need to insert the SIMD-level name instead of + // the enum. + THROW( "requested SIMD level (" << simdReqStr << ") not supported by current CPU (max " + << read_x86_extension_name() << ")." ); } +#endif #if ENABLE_SIMD_OPT_BUFFER #if defined( TARGET_SIMD_X86 ) @@ -825,10 +843,14 @@ const char* VVEncImpl::setSIMDExtension( const char* simdId ) #endif #endif #if ENABLE_SIMD_TRAFO - g_tCoeffOps.initTCoeffOpsX86(); + g_tCoeffOps.initTCoeffOpsX86(); #endif +#if defined( REAL_TARGET_ARM ) + return read_arm_extension_name().c_str(); +#else return read_x86_extension_name().c_str(); +#endif } #if HANDLE_EXCEPTION catch( Exception& e ) @@ -863,13 +885,15 @@ std::string VVEncImpl::getCompileInfoString() std::string VVEncImpl::createEncoderInfoStr() { std::stringstream cssCap; -#if defined( TARGET_SIMD_X86 ) - setSIMDExtension( nullptr ); // ensure SIMD-detection is finished +#if defined( TARGET_SIMD_ARM ) + setSIMDExtension( nullptr ); // Ensure SIMD-detection is finished + cssCap << getCompileInfoString() << "[SIMD=" << read_arm_extension_name() << "]"; +#elif defined( TARGET_SIMD_X86 ) + setSIMDExtension( nullptr ); // Ensure SIMD-detection is finished cssCap << getCompileInfoString() << "[SIMD=" << read_x86_extension_name() <<"]"; -#else // !TARGET_SIMD_X86 +#else // !TARGET_SIMD_X86 && !TARGET_SIMD_ARM cssCap << getCompileInfoString() << "[SIMD=SCALAR]"; -#endif // !TARGET_SIMD_X86 - +#endif std::string cInfoStr; cInfoStr = "VVenC, the Fraunhofer H.266/VVC Encoder, version " VVENC_VERSION;