From f26d46386e46494c4e7b20dd55bbc89ad50810c2 Mon Sep 17 00:00:00 2001 From: George Steed Date: Tue, 29 Oct 2024 15:15:45 +0000 Subject: [PATCH] Rework setSIMDExtension to compile with partial SIMD enabled (#455) * Rework setSIMDExtension to compile with partial SIMD enabled There are existing options to enable both Arm native and SIMDe-based intrinsics implementations, however if only one of these is enabled then compilation for Arm targets currently fails. Fix this by adjusting the existing #ifdef guards and adding new ones to cover the previously failing option combinations. Also stop checking REAL_TARGET_ARM since it does not distinguish between which SIMD targets are enabled. * Disable RdCost Neon code unless TARGET_SIMD_X86 is enabled The existing Neon code in RdCostARM.h depends on being able to call into the existing SIMDe-based x86 kernels. If VVENC_ENABLE_X86_SIMD is disabled in the CMake then these kernels are unavailable and compilation fails. Until native Neon kernels are added for the missing functions, simply disable the optimised RdCost Neon code unless the SIMDe kernels are also available. --- source/Lib/CommonLib/arm/RdCostARM.h | 34 ++++++++++++++++++---------- source/Lib/vvenc/vvencimpl.cpp | 34 +++++++++++++++++----------- 2 files changed, 43 insertions(+), 25 deletions(-) diff --git a/source/Lib/CommonLib/arm/RdCostARM.h b/source/Lib/CommonLib/arm/RdCostARM.h index f15da5dfc..8536fabfb 100644 --- a/source/Lib/CommonLib/arm/RdCostARM.h +++ b/source/Lib/CommonLib/arm/RdCostARM.h @@ -51,12 +51,14 @@ POSSIBILITY OF SUCH DAMAGE. #include "CommonLib/CommonDef.h" #include "../RdCost.h" -#if SIMD_EVERYWHERE_EXTENSION_LEVEL_ID==X86_SIMD_AVX2 -# define USE_AVX2 -#elif SIMD_EVERYWHERE_EXTENSION_LEVEL_ID==X86_SIMD_SSE42 -# define USE_SSE42 -#elif SIMD_EVERYWHERE_EXTENSION_LEVEL_ID==X86_SIMD_SSE41 -# define USE_SSE41 +#if defined( TARGET_SIMD_X86 ) +#if SIMD_EVERYWHERE_EXTENSION_LEVEL_ID == X86_SIMD_AVX2 +#define USE_AVX2 +#elif SIMD_EVERYWHERE_EXTENSION_LEVEL_ID == X86_SIMD_SSE42 +#define USE_SSE42 +#elif SIMD_EVERYWHERE_EXTENSION_LEVEL_ID == X86_SIMD_SSE41 +#define USE_SSE41 +#endif #endif #ifdef TARGET_SIMD_X86 @@ -66,8 +68,12 @@ POSSIBILITY OF SUCH DAMAGE. namespace vvenc { -#ifdef TARGET_SIMD_ARM -#if __ARM_ARCH >= 8 +#if defined( TARGET_SIMD_ARM ) +#if REAL_TARGET_AARCH64 + +// The xGetHADs_ARMSIMD functions depend on the SIMDe kernels being enabled +// during compilation. +#if defined( TARGET_SIMD_X86 ) //working up to 12-bit static uint32_t xCalcHAD16x16_fast_Neon( const Pel *piOrg, const Pel *piCur, const int iStrideOrg, const int iStrideCur, const int iBitDepth ) @@ -899,6 +905,7 @@ Distortion RdCost::xGetHAD2SADs_ARMSIMD( const DistParam &rcDtParam ) return std::min( distHad, 2*distSad); } +#endif // defined( TARGET_SIMD_X86 ) template void xGetSADX5_16xN_SIMDImp( const DistParam& rcDtParam, Distortion* cost ) @@ -993,7 +1000,9 @@ void RdCost::xGetSADX5_16xN_SIMD(const DistParam& rcDtParam, Distortion* cost, b template void RdCost::_initRdCostARM() { - m_afpDistortFuncX5[1] = xGetSADX5_16xN_SIMD; + m_afpDistortFuncX5[1] = xGetSADX5_16xN_SIMD; + +#if defined( TARGET_SIMD_X86 ) m_afpDistortFunc[0][DF_HAD_2SAD ] = RdCost::xGetHAD2SADs_ARMSIMD; m_afpDistortFunc[0][DF_HAD] = RdCost::xGetHADs_ARMSIMD; @@ -1013,18 +1022,19 @@ void RdCost::_initRdCostARM() m_afpDistortFunc[0][DF_HAD32_fast] = RdCost::xGetHADs_ARMSIMD; m_afpDistortFunc[0][DF_HAD64_fast] = RdCost::xGetHADs_ARMSIMD; m_afpDistortFunc[0][DF_HAD128_fast] = RdCost::xGetHADs_ARMSIMD; +#endif // defined( TARGET_SIMD_X86 ) } -#else // !__ARM_ARCH >= 8 +#else // !REAL_TARGET_AARCH64 template void RdCost::_initRdCostARM() {} -#endif // !__ARM_ARCH >= 8 +#endif // REAL_TARGET_AARCH64 template void RdCost::_initRdCostARM(); -#endif // TARGET_SIMD_ARM +#endif // defined( TARGET_SIMD_ARM ) } // namespace vvenc diff --git a/source/Lib/vvenc/vvencimpl.cpp b/source/Lib/vvenc/vvencimpl.cpp index 9c9b764d1..ca09b6dff 100644 --- a/source/Lib/vvenc/vvencimpl.cpp +++ b/source/Lib/vvenc/vvencimpl.cpp @@ -797,19 +797,24 @@ void VVEncImpl::registerMsgCbf( void * ctx, vvencLoggingCallback msgFnc ) const char* VVEncImpl::setSIMDExtension( const char* simdId ) { const std::string simdReqStr( simdId ? simdId : "" ); -#if defined( TARGET_SIMD_X86 ) -# if HANDLE_EXCEPTION +#if defined( TARGET_SIMD_X86 ) || defined( TARGET_SIMD_ARM ) +#if HANDLE_EXCEPTION try -# endif // HANDLE_EXCEPTION +#endif // HANDLE_EXCEPTION { -#if defined( REAL_TARGET_ARM ) +#if defined( TARGET_SIMD_ARM ) ARM_VEXT arm_ext = string_to_arm_vext( simdReqStr ); +#if defined( TARGET_SIMD_X86 ) + // Translate any non-scalar Arm SIMD request to enable SIMDe. X86_VEXT x86_ext = arm_ext == arm_simd::UNDEFINED ? x86_simd::UNDEFINED : arm_ext == arm_simd::SCALAR ? x86_simd::SCALAR : SIMD_EVERYWHERE_EXTENSION_LEVEL; +#endif try { +#if defined( TARGET_SIMD_X86 ) read_x86_extension_flags( x86_ext ); +#endif read_arm_extension_flags( arm_ext ); } catch( Exception& ) @@ -819,7 +824,7 @@ const char* VVEncImpl::setSIMDExtension( const char* simdId ) THROW( "requested SIMD level (" << simdReqStr << ") not supported by current CPU (max " << read_arm_extension_name() << ")." ); } -#else +#else // defined( TARGET_SIMD_X86 ) X86_VEXT request_ext = string_to_x86_vext( simdReqStr ); try { @@ -835,18 +840,21 @@ const char* VVEncImpl::setSIMDExtension( const char* simdId ) #endif #if ENABLE_SIMD_OPT_BUFFER - #if defined( TARGET_SIMD_X86 ) +#if defined( TARGET_SIMD_X86 ) g_pelBufOP.initPelBufOpsX86(); - #endif - #if defined( TARGET_SIMD_ARM ) +#endif +#if defined( TARGET_SIMD_ARM ) g_pelBufOP.initPelBufOpsARM(); - #endif #endif +#endif // ENABLE_SIMD_OPT_BUFFER + #if ENABLE_SIMD_TRAFO +#if defined( TARGET_SIMD_X86 ) g_tCoeffOps.initTCoeffOpsX86(); #endif +#endif // ENABLE_SIMD_TRAFO -#if defined( REAL_TARGET_ARM ) +#if defined( TARGET_SIMD_ARM ) return read_arm_extension_name().c_str(); #else return read_x86_extension_name().c_str(); @@ -859,8 +867,8 @@ const char* VVEncImpl::setSIMDExtension( const char* simdId ) msg.log( VVENC_ERROR, "\n%s\n", e.what() ); return nullptr; } -#endif // HANDLE_EXCEPTION -#else // !TARGET_SIMD_X86 +#endif // HANDLE_EXCEPTION +#else // !defined( TARGET_SIMD_X86 ) && !defined( TARGET_SIMD_ARM ) if( !simdReqStr.empty() && simdReqStr != "SCALAR" ) { MsgLog msg; @@ -868,7 +876,7 @@ const char* VVEncImpl::setSIMDExtension( const char* simdId ) return nullptr; } return "SCALAR"; -#endif // TARGET_SIMD_X86 +#endif // defined( TARGET_SIMD_X86 ) || defined( TARGET_SIMD_ARM ) } ///< creates compile info string containing OS, Compiler and Bit-depth (e.g. 32 or 64 bit).