Skip to content

Commit

Permalink
trying to fix avx build failure on nix compiler
Browse files Browse the repository at this point in the history
  • Loading branch information
revelator authored Nov 18, 2024
1 parent 8dcbdc2 commit d0fc68c
Show file tree
Hide file tree
Showing 6 changed files with 7,808 additions and 7,506 deletions.
6 changes: 6 additions & 0 deletions neo/idlib/math/Simd_AVX.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,10 @@ If you have questions concerning this license or the applicable additional terms
//
//===============================================================

// Revelator: these work whether in gcc clang or msvc in x86 or x64 (no inline assembly used)
#if defined(_MSC_VER) && ( defined(_M_X64) || defined(_M_IX86) ) || \
defined(__GNUC__) && ( defined(__i386__) || defined (__x86_64__) ) && defined(__AVX__)

#include <immintrin.h>

#include "idlib/geometry/DrawVert.h"
Expand Down Expand Up @@ -122,3 +126,5 @@ void VPCALL idSIMD_AVX::CullByFrustum2( idDrawVert *verts, const int numVerts, c
}
_mm256_zeroupper();
}

#endif /* _MSC_VER */
7 changes: 6 additions & 1 deletion neo/idlib/math/Simd_AVX.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,15 @@

class idSIMD_AVX : public idSIMD_SSE3 {
public:
// Revelator: these work whether in gcc clang or msvc x86 or x64 (no inline assembly used)
// Revelator: these work whether in gcc clang or msvc x86 or x64 (no inline assembly used)
#if defined(_MSC_VER) && ( defined(_M_X64) || defined(_M_IX86) ) || \
defined(__GNUC__) && ( defined(__i386__) || defined (__x86_64__) ) && defined(__AVX__)

virtual const char *VPCALL GetName( void ) const;
virtual void VPCALL CullByFrustum( idDrawVert *verts, const int numVerts, const idPlane frustum[6], byte *pointCull, float epsilon );
virtual void VPCALL CullByFrustum2( idDrawVert *verts, const int numVerts, const idPlane frustum[6], unsigned short *pointCull, float epsilon );

#endif /* _MSC_VER */
};

#endif /* !__MATH_SIMD_AVX_H__ */
30 changes: 15 additions & 15 deletions neo/idlib/math/Simd_AVX2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ If you have questions concerning this license or the applicable additional terms
===========================================================================
*/

#include "sys/platform.h"
#include "sys/platform.h"
#include "Simd_AVX2.h"

//===============================================================
Expand All @@ -35,6 +35,10 @@ If you have questions concerning this license or the applicable additional terms
//
//===============================================================

// Revelator: these work whether in gcc clang or msvc in x86 or x64 (no inline assembly used)
#if defined(_MSC_VER) && ( defined(_M_X64) || defined(_M_IX86) ) || \
defined(__GNUC__) && ( defined(__i386__) || defined (__x86_64__) ) && defined(__AVX2__)

#include <immintrin.h>

#include "idlib/geometry/DrawVert.h"
Expand Down Expand Up @@ -62,20 +66,17 @@ void VPCALL idSIMD_AVX2::CullByFrustum( idDrawVert *verts, const int numVerts, c
const __m256 fC = _mm256_set_ps( 0, 0, frustum[5][2], frustum[4][2], frustum[3][2], frustum[2][2], frustum[1][2], frustum[0][2] );
const __m256 fD = _mm256_set_ps( 0, 0, frustum[5][3], frustum[4][3], frustum[3][3], frustum[2][3], frustum[1][3], frustum[0][3] );
const __m256 eps = _mm256_set1_ps( epsilon );
const byte mask6 = (1 << 6) - 1;
const byte mask6 = ( 1 << 6 ) - 1;

for ( int j = 0; j < numVerts; j++ ) {
idVec3 &vec = verts[j].xyz;
__m256 vX = _mm256_set1_ps( vec.x );
__m256 vY = _mm256_set1_ps( vec.y );
__m256 vZ = _mm256_set1_ps( vec.z );
__m256 d = _mm256_fmadd_ps( fA, vX,
_mm256_fmadd_ps( fB, vY,
_mm256_fmadd_ps( fC, vZ, fD )
)
);
__m256 d = _mm256_fmadd_ps( fA, vX, _mm256_fmadd_ps( fB, vY,
_mm256_fmadd_ps( fC, vZ, fD ) ) );
int mask_lo = _mm256_movemask_ps( _mm256_cmp_ps( d, eps, _CMP_LT_OQ ) );
pointCull[j] = (byte)mask_lo & mask6;
pointCull[j] = ( byte )mask_lo & mask6;
}
_mm256_zeroupper();
}
Expand All @@ -92,21 +93,20 @@ void VPCALL idSIMD_AVX2::CullByFrustum2( idDrawVert *verts, const int numVerts,
const __m256 fD = _mm256_set_ps( 0, 0, frustum[5][3], frustum[4][3], frustum[3][3], frustum[2][3], frustum[1][3], frustum[0][3] );
const __m256 eps = _mm256_set1_ps( epsilon );
static const __m256 epsM = _mm256_set1_ps( -epsilon );
const short mask6 = (1 << 6) - 1;
const short mask6 = ( 1 << 6 ) - 1;

for ( int j = 0; j < numVerts; j++ ) {
idVec3 &vec = verts[j].xyz;
__m256 vX = _mm256_set1_ps( vec.x );
__m256 vY = _mm256_set1_ps( vec.y );
__m256 vZ = _mm256_set1_ps( vec.z );
__m256 d = _mm256_fmadd_ps( fA, vX,
_mm256_fmadd_ps( fB, vY,
_mm256_fmadd_ps( fC, vZ, fD )
)
);
__m256 d = _mm256_fmadd_ps( fA, vX, _mm256_fmadd_ps( fB, vY,
_mm256_fmadd_ps( fC, vZ, fD ) ) );
int mask_lo = _mm256_movemask_ps( _mm256_cmp_ps( d, eps, _CMP_LT_OQ ) );
int mask_hi = _mm256_movemask_ps( _mm256_cmp_ps( d, eps, _CMP_GT_OQ ) );
pointCull[j] = (unsigned short)(mask_lo & mask6 | (mask_hi & mask6) << 6);
pointCull[j] = ( unsigned short )( mask_lo & mask6 | ( mask_hi & mask6 ) << 6 );
}
_mm256_zeroupper();
}

#endif
7 changes: 6 additions & 1 deletion neo/idlib/math/Simd_AVX2.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,15 @@

class idSIMD_AVX2 : public idSIMD_AVX {
public:
// Revelator: these work whether gcc clang or msvc in x86 or x64 (no inline assembly used)
// Revelator: these work whether in gcc clang or msvc in x86 or x64 (no inline assembly used)
#if defined(_MSC_VER) && ( defined(_M_X64) || defined(_M_IX86) ) || \
defined(__GNUC__) && ( defined(__i386__) || defined (__x86_64__) ) && defined(__AVX2__)

virtual const char *VPCALL GetName( void ) const;
virtual void VPCALL CullByFrustum( idDrawVert *verts, const int numVerts, const idPlane frustum[6], byte *pointCull, float epsilon );
virtual void VPCALL CullByFrustum2( idDrawVert *verts, const int numVerts, const idPlane frustum[6], unsigned short *pointCull, float epsilon );

#endif /* _MSC_VER */
};

#endif /* !__MATH_SIMD_AVX2_H__ */
Loading

0 comments on commit d0fc68c

Please sign in to comment.