From a2fa31990fb2193eecd343ea976fd7eec0c6c8a6 Mon Sep 17 00:00:00 2001 From: George Steed Date: Fri, 18 Oct 2024 15:56:42 +0100 Subject: [PATCH] Add AArch64 extension handling and SVE/SVE2 feature detection (#443) * Add Arm extension handling in preparation for feature detection This commit continues to assume that only Neon is present, but adds the helper functions and call sites to match the existing x86 behaviour in preparation for adding feature detection logic in a later commit. * Add SVE and SVE2 to Arm extensions enum Plus wire up Linux feature detection and amend init switches to just fall back to the Neon cases for now. * Guard AArch64 SVE/SVE2 features by new CMake options Introduce new options VVENC_ENABLE_ARM_SIMD_SVE and VVENC_ENABLE_ARM_SIMD_SVE2 to control whether SVE and SVE2 are enabled, plus add #if guards to disable feature detection if the feature is not available. This commit does not include guarding which source files are actually built with SVE/SVE2 flags enabled since there are currently zero SVE/SVE2 source files. --- CMakeLists.txt | 37 ++++++-- source/Lib/CommonLib/CommonDef.h | 14 +-- source/Lib/CommonLib/arm/CommonDefARM.cpp | 108 +++++++++++++++++++++- source/Lib/CommonLib/arm/CommonDefARM.h | 7 +- source/Lib/CommonLib/arm/InitARM.cpp | 24 +---- source/Lib/CommonLib/x86/CommonDefX86.cpp | 6 +- source/Lib/CommonLib/x86/CommonDefX86.h | 4 +- source/Lib/vvenc/vvencimpl.cpp | 48 +++++++--- 8 files changed, 194 insertions(+), 54 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index f9916fbbb..78443acb3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -35,8 +35,10 @@ if( VVENC_TARGET_ARCH STREQUAL "ARM" ) endif() # we enable x86 intrinsics for all target architectures, because they are implemented through simd-everywhere on non-x86 -set( VVENC_ENABLE_X86_SIMD TRUE CACHE BOOL "enable x86 intrinsics" ) -set( VVENC_ENABLE_ARM_SIMD ${VVENC_ARM_SIMD_DEFAULT} CACHE BOOL "enable ARM intrinsics" ) +set( VVENC_ENABLE_X86_SIMD TRUE CACHE BOOL "Enable x86 intrinsics" ) +set( VVENC_ENABLE_ARM_SIMD ${VVENC_ARM_SIMD_DEFAULT} CACHE BOOL "Enable Arm Neon intrinsics" ) +set( VVENC_ENABLE_ARM_SIMD_SVE FALSE CACHE BOOL "Enable Arm SVE intrinsics" ) +set( VVENC_ENABLE_ARM_SIMD_SVE2 FALSE CACHE BOOL "Enable Arm SVE2 intrinsics" ) check_problematic_compiler( VVENC_PROBLEMATIC_COMPILER "MSVC" 19.38 19.39 ) if( VVENC_PROBLEMATIC_COMPILER ) @@ -72,12 +74,35 @@ if( VVENC_ENABLE_X86_SIMD ) endif() message( STATUS "x86 SIMD intrinsics enabled (using SIMDE for non-x86 targets)" ) - add_compile_definitions( TARGET_SIMD_X86 ) + add_compile_definitions( TARGET_SIMD_X86=1 ) endif() -if( VVENC_ENABLE_ARM_SIMD ) - message( STATUS "ARM SIMD intrinsics enabled" ) - add_compile_definitions( TARGET_SIMD_ARM ) +if( VVENC_TARGET_ARCH STREQUAL "ARM" ) + if( VVENC_ENABLE_ARM_SIMD ) + message( STATUS "Arm Neon intrinsics enabled" ) + add_compile_definitions( TARGET_SIMD_ARM=1 ) + else() + message( STATUS "Arm Neon intrinsics disabled, disabling Arm SVE/SVE2 intrinsics" ) + # If Neon is disabled make sure that SVE/SVE2 are also disabled. + set( VVENC_ENABLE_ARM_SIMD_SVE FALSE ) + set( VVENC_ENABLE_ARM_SIMD_SVE2 FALSE ) + endif() + + if( VVENC_ENABLE_ARM_SIMD_SVE ) + message( STATUS "Arm SVE intrinsics enabled" ) + add_compile_definitions( TARGET_SIMD_ARM_SVE=1 ) + else() + message( STATUS "Arm SVE intrinsics disabled, disabling Arm SVE2 intrinsics" ) + # If SVE is disabled make sure that SVE2 are also disabled. + set( VVENC_ENABLE_ARM_SIMD_SVE2 FALSE ) + endif() + + if( VVENC_ENABLE_ARM_SIMD_SVE2 ) + message( STATUS "Arm SVE2 intrinsics enabled" ) + add_compile_definitions( TARGET_SIMD_ARM_SVE2=1 ) + else() + message( STATUS "Arm SVE2 intrinsics disabled" ) + endif() endif() if( NOT CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR ) diff --git a/source/Lib/CommonLib/CommonDef.h b/source/Lib/CommonLib/CommonDef.h index 3a83aeba4..c70f810e4 100644 --- a/source/Lib/CommonLib/CommonDef.h +++ b/source/Lib/CommonLib/CommonDef.h @@ -728,12 +728,14 @@ namespace x86_simd namespace arm_simd { #ifdef TARGET_SIMD_ARM - typedef enum - { - UNDEFINED = -1, - SCALAR = 0, - NEON, - } ARM_VEXT; +typedef enum +{ + UNDEFINED = -1, + SCALAR = 0, + NEON, + SVE, + SVE2, +} ARM_VEXT; #endif // TARGET_SIMD_ARM } // namespace arm_simd diff --git a/source/Lib/CommonLib/arm/CommonDefARM.cpp b/source/Lib/CommonLib/arm/CommonDefARM.cpp index a63e24225..06dd65492 100644 --- a/source/Lib/CommonLib/arm/CommonDefARM.cpp +++ b/source/Lib/CommonLib/arm/CommonDefARM.cpp @@ -40,25 +40,127 @@ POSSIBILITY OF SUCH DAMAGE. ------------------------------------------------------------------------------------------- */ -/** \file CommonDefX86.cpp -*/ +/** \file CommonDefARM.cpp + */ #include "CommonDefARM.h" +#if defined( __linux__ ) +#include // getauxval +#endif + namespace vvenc { using namespace arm_simd; +const static std::vector> vext_names{ + { UNDEFINED, "" }, + { SCALAR, "SCALAR" }, + { NEON, "NEON" }, +#if TARGET_SIMD_ARM_SVE + { SVE, "SVE" }, +#endif +#if TARGET_SIMD_ARM_SVE2 + { SVE2, "SVE2" }, +#endif +}; + +const std::string& arm_vext_to_string( ARM_VEXT vext ) +{ + for( auto& it : vext_names ) + { + if( it.first == vext ) + { + return it.second; + } + } + THROW( "Invalid SIMD extension value " << vext ); +} + +ARM_VEXT string_to_arm_vext( const std::string& ext_name ) +{ + if( ext_name.empty() ) + { + return UNDEFINED; + } + + for( auto& it : vext_names ) + { + if( it.second == ext_name ) + { + return it.first; + } + } + + THROW( "Invalid SIMD Mode string: \"" << ext_name << "\"" ); +} + +#if defined( __linux__ ) + +// Define hwcap values ourselves: building with an old auxv header where these +// hwcap values are not defined should not prevent features from being enabled. +#define AARCH64_HWCAP_SVE ( 1 << 22 ) +#define AARCH64_HWCAP2_SVE2 ( 1 << 1 ) + +static ARM_VEXT _get_arm_extensions() +{ + // We assume Neon is always supported for relevant Arm processors. + ARM_VEXT ext = NEON; + +#if TARGET_SIMD_ARM_SVE + unsigned long hwcap = getauxval( AT_HWCAP ); +#endif +#if TARGET_SIMD_ARM_SVE2 + unsigned long hwcap2 = getauxval( AT_HWCAP2 ); +#endif + +#if TARGET_SIMD_ARM_SVE + if( hwcap & AARCH64_HWCAP_SVE ) + { + ext = SVE; +#if TARGET_SIMD_ARM_SVE2 + if( hwcap2 & AARCH64_HWCAP2_SVE2 ) + { + ext = SVE2; + } +#endif + } +#endif + + return ext; +} + +#else + +static ARM_VEXT _get_arm_extensions() +{ + // We assume Neon is always supported for relevant Arm processors. + // No other extensions supported on non-Linux platforms for now. + return NEON; +} + +#endif + ARM_VEXT read_arm_extension_flags( ARM_VEXT request ) { - static ARM_VEXT ext_flags = NEON; // We assume NEON is always supported for relevant ARM processors + static ARM_VEXT max_supported = _get_arm_extensions(); + static ARM_VEXT ext_flags = max_supported; if( request != UNDEFINED ) { + if( request > max_supported ) + { + THROW( "requested SIMD level (" << request << ") not supported by current CPU (max " << max_supported << ")." ); + } ext_flags = request; } return ext_flags; }; +const std::string& read_arm_extension_name() +{ + return arm_vext_to_string( read_arm_extension_flags() ); +} + } // namespace diff --git a/source/Lib/CommonLib/arm/CommonDefARM.h b/source/Lib/CommonLib/arm/CommonDefARM.h index fa859ca3d..9fb430cba 100644 --- a/source/Lib/CommonLib/arm/CommonDefARM.h +++ b/source/Lib/CommonLib/arm/CommonDefARM.h @@ -58,8 +58,11 @@ namespace vvenc { using namespace arm_simd; -ARM_VEXT read_arm_extension_flags( ARM_VEXT request = arm_simd::UNDEFINED ); -// std::string read_arm_extension_name(); +const std::string& arm_vext_to_string( ARM_VEXT vext ); +ARM_VEXT string_to_arm_vext( const std::string& ext_name ); + +ARM_VEXT read_arm_extension_flags( ARM_VEXT request = arm_simd::UNDEFINED ); +const std::string& read_arm_extension_name(); } // namespace diff --git a/source/Lib/CommonLib/arm/InitARM.cpp b/source/Lib/CommonLib/arm/InitARM.cpp index c96994ff7..c51e03db3 100644 --- a/source/Lib/CommonLib/arm/InitARM.cpp +++ b/source/Lib/CommonLib/arm/InitARM.cpp @@ -70,13 +70,9 @@ namespace vvenc void InterpolationFilter::initInterpolationFilterARM() { auto vext = read_arm_extension_flags(); - switch( vext ) + if( vext >= NEON ) { - case NEON: _initInterpolationFilterARM(); - break; - default: - break; } } #endif @@ -85,13 +81,9 @@ void InterpolationFilter::initInterpolationFilterARM() void PelBufferOps::initPelBufOpsARM() { auto vext = read_arm_extension_flags(); - switch( vext ) + if( vext >= NEON ) { - case NEON: _initPelBufOpsARM(); - break; - default: - break; } } #endif @@ -100,13 +92,9 @@ void PelBufferOps::initPelBufOpsARM() void RdCost::initRdCostARM() { auto vext = read_arm_extension_flags(); - switch( vext ) + if( vext >= NEON ) { - case NEON: _initRdCostARM(); - break; - default: - break; } } #endif @@ -115,13 +103,9 @@ void RdCost::initRdCostARM() void MCTF::initMCTF_ARM() { auto vext = read_arm_extension_flags(); - switch( vext ) + if( vext >= NEON ) { - case NEON: _initMCTF_ARM(); - break; - default: - break; } } #endif // ENABLE_SIMD_OPT_MCTF diff --git a/source/Lib/CommonLib/x86/CommonDefX86.cpp b/source/Lib/CommonLib/x86/CommonDefX86.cpp index 6b666d486..777a6ff00 100644 --- a/source/Lib/CommonLib/x86/CommonDefX86.cpp +++ b/source/Lib/CommonLib/x86/CommonDefX86.cpp @@ -79,7 +79,7 @@ const static std::map vext_names{ { UNDEFINED, "" }, { SC # endif # endif // !REAL_TARGET_X86 -const std::string& vext_to_string( X86_VEXT vext ) +const std::string& x86_vext_to_string( X86_VEXT vext ) { try { @@ -91,7 +91,7 @@ const std::string& vext_to_string( X86_VEXT vext ) } } -X86_VEXT string_to_vext( const std::string& ext_name ) +X86_VEXT string_to_x86_vext( const std::string& ext_name ) { if( ext_name.empty() ) { @@ -287,7 +287,7 @@ X86_VEXT read_x86_extension_flags( X86_VEXT request ) const std::string& read_x86_extension_name() { - return vext_to_string( read_x86_extension_flags() ); + return x86_vext_to_string( read_x86_extension_flags() ); } } // namespace vvenc diff --git a/source/Lib/CommonLib/x86/CommonDefX86.h b/source/Lib/CommonLib/x86/CommonDefX86.h index 0fe698037..993ff44a8 100644 --- a/source/Lib/CommonLib/x86/CommonDefX86.h +++ b/source/Lib/CommonLib/x86/CommonDefX86.h @@ -87,8 +87,8 @@ namespace vvenc using namespace x86_simd; -const std::string& vext_to_string( X86_VEXT vext ); -X86_VEXT string_to_vext( const std::string& ext_name ); +const std::string& x86_vext_to_string( X86_VEXT vext ); +X86_VEXT string_to_x86_vext( const std::string& ext_name ); X86_VEXT read_x86_extension_flags( X86_VEXT request = x86_simd::UNDEFINED ); const std::string& read_x86_extension_name(); diff --git a/source/Lib/vvenc/vvencimpl.cpp b/source/Lib/vvenc/vvencimpl.cpp index 2151250f8..9c9b764d1 100644 --- a/source/Lib/vvenc/vvencimpl.cpp +++ b/source/Lib/vvenc/vvencimpl.cpp @@ -802,19 +802,37 @@ const char* VVEncImpl::setSIMDExtension( const char* simdId ) try # endif // HANDLE_EXCEPTION { - X86_VEXT request_ext = string_to_vext( simdReqStr ); +#if defined( REAL_TARGET_ARM ) + ARM_VEXT arm_ext = string_to_arm_vext( simdReqStr ); + X86_VEXT x86_ext = arm_ext == arm_simd::UNDEFINED ? x86_simd::UNDEFINED + : arm_ext == arm_simd::SCALAR ? x86_simd::SCALAR + : SIMD_EVERYWHERE_EXTENSION_LEVEL; + try + { + read_x86_extension_flags( x86_ext ); + read_arm_extension_flags( arm_ext ); + } + catch( Exception& ) + { + // Not using the actual message from the exception here, because we need to insert the SIMD-level name instead of + // the enum. + THROW( "requested SIMD level (" << simdReqStr << ") not supported by current CPU (max " + << read_arm_extension_name() << ")." ); + } +#else + X86_VEXT request_ext = string_to_x86_vext( simdReqStr ); try { read_x86_extension_flags( request_ext ); -#if defined( TARGET_SIMD_ARM ) - read_arm_extension_flags( request_ext == x86_simd::UNDEFINED ? arm_simd::UNDEFINED : request_ext != x86_simd::SCALAR ? arm_simd::NEON : arm_simd::SCALAR ); -#endif } catch( Exception& ) { - // not using the actual message from the exception here, because we need to insert the SIMD-level name instead of the enum - THROW( "requested SIMD level (" << simdReqStr << ") not supported by current CPU (max " << read_x86_extension_name() << ")." ); + // Not using the actual message from the exception here, because we need to insert the SIMD-level name instead of + // the enum. + THROW( "requested SIMD level (" << simdReqStr << ") not supported by current CPU (max " + << read_x86_extension_name() << ")." ); } +#endif #if ENABLE_SIMD_OPT_BUFFER #if defined( TARGET_SIMD_X86 ) @@ -825,10 +843,14 @@ const char* VVEncImpl::setSIMDExtension( const char* simdId ) #endif #endif #if ENABLE_SIMD_TRAFO - g_tCoeffOps.initTCoeffOpsX86(); + g_tCoeffOps.initTCoeffOpsX86(); #endif +#if defined( REAL_TARGET_ARM ) + return read_arm_extension_name().c_str(); +#else return read_x86_extension_name().c_str(); +#endif } #if HANDLE_EXCEPTION catch( Exception& e ) @@ -863,13 +885,15 @@ std::string VVEncImpl::getCompileInfoString() std::string VVEncImpl::createEncoderInfoStr() { std::stringstream cssCap; -#if defined( TARGET_SIMD_X86 ) - setSIMDExtension( nullptr ); // ensure SIMD-detection is finished +#if defined( TARGET_SIMD_ARM ) + setSIMDExtension( nullptr ); // Ensure SIMD-detection is finished + cssCap << getCompileInfoString() << "[SIMD=" << read_arm_extension_name() << "]"; +#elif defined( TARGET_SIMD_X86 ) + setSIMDExtension( nullptr ); // Ensure SIMD-detection is finished cssCap << getCompileInfoString() << "[SIMD=" << read_x86_extension_name() <<"]"; -#else // !TARGET_SIMD_X86 +#else // !TARGET_SIMD_X86 && !TARGET_SIMD_ARM cssCap << getCompileInfoString() << "[SIMD=SCALAR]"; -#endif // !TARGET_SIMD_X86 - +#endif std::string cInfoStr; cInfoStr = "VVenC, the Fraunhofer H.266/VVC Encoder, version " VVENC_VERSION;