Skip to content

Commit

Permalink
SN-5899 imx mcu optimizations (#757)
Browse files Browse the repository at this point in the history
* Renamed ISMatrix.h inline macros to allcaps

* Create functions for mag_VecX() and mat3x3_IsIdentify()
  • Loading branch information
waltjohnson authored Nov 8, 2024
1 parent 970b0e3 commit 835cf84
Show file tree
Hide file tree
Showing 5 changed files with 126 additions and 56 deletions.
64 changes: 64 additions & 0 deletions src/ISMatrix.c
Original file line number Diff line number Diff line change
Expand Up @@ -632,6 +632,48 @@ void abs_Vec4d( ixVector4d result, const ixVector4d v )
result[3] = fabs(v[3]);
}

f_t dot_Vec2( const ixVector2 v)
{
return v[0] * v[0] +
v[1] * v[1];
}

f_t dot_Vec3( const ixVector3 v)
{
return v[0] * v[0] +
v[1] * v[1] +
v[2] * v[2];
}

f_t dot_Vec4( const ixVector4 v)
{
return v[0] * v[0] +
v[1] * v[1] +
v[2] * v[2] +
v[3] * v[3];
}

double dot_Vec2d( const ixVector2d v)
{
return v[0] * v[0] +
v[1] * v[1];
}

double dot_Vec3d( const ixVector3d v)
{
return v[0] * v[0] +
v[1] * v[1] +
v[2] * v[2];
}

double dot_Vec4d( const ixVector4d v)
{
return v[0] * v[0] +
v[1] * v[1] +
v[2] * v[2] +
v[3] * v[3];
}

f_t dot_Vec2_Vec2( const ixVector2 v1, const ixVector2 v2 )
{
return v1[0] * v2[0] +
Expand Down Expand Up @@ -674,6 +716,21 @@ double dot_Vec4d_Vec4d( const ixVector4d v1, const ixVector4d v2 )
v1[3] * v2[3];
}

f_t mag_Vec2( const ixVector2 v)
{
return _SQRT(dot_Vec2(v));
}

f_t mag_Vec3( const ixVector3 v)
{
return _SQRT(dot_Vec3(v));
}

f_t mag_Vec4( const ixVector4 v)
{
return _SQRT(dot_Vec4(v));
}

//_______________________________________________________________________________________________
//observe that cross product output cannot overwrite cross product input without destroying logic
void cross_Vec3( ixVector3 result, const ixVector3 v1, const ixVector3 v2 )
Expand Down Expand Up @@ -870,6 +927,13 @@ void mean_Vec3d_Vec3d( ixVector3d result, const ixVector3d v1, const ixVector3d
mul_Vec3d_X(result, result, 0.5);
}

int mat3x3_IsIdentity(const f_t m[])
{
return (m[0]==1.0f) && (m[1]==0.0f) && (m[2]==0.0f) &&
(m[3]==0.0f) && (m[4]==1.0f) && (m[5]==0.0f) &&
(m[6]==0.0f) && (m[7]==0.0f) && (m[8]==1.0f);
}

void cpy_MatRxC_MatMxN( f_t *result, i_t r, i_t c, i_t r_offset, i_t c_offset, f_t *A, i_t m, i_t n )
{
// Ensure source matrix A fits within result matrix
Expand Down
100 changes: 53 additions & 47 deletions src/ISMatrix.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,57 +25,42 @@ extern "C" {
//_____ M A C R O S ________________________________________________________

// Magnitude Squared or Dot Product of vector w/ itself
#if 0 // Inline functions
#define dot_Vec2(v) ((v)[0]*(v)[0] + (v)[1]*(v)[1])
#define dot_Vec3(v) ((v)[0]*(v)[0] + (v)[1]*(v)[1] + (v)[2]*(v)[2])
#define dot_Vec4(v) ((v)[0]*(v)[0] + (v)[1]*(v)[1] + (v)[2]*(v)[2] + (v)[3]*(v)[3])
#define dot_Vec2d(v) dot_Vec2(v)
#define dot_Vec3d(v) dot_Vec3(v)
#define dot_Vec4d(v) dot_Vec4(v)
#else // Normal functions (less instruction space)
#define dot_Vec2(v) dot_Vec2_Vec2(v,v)
#define dot_Vec3(v) dot_Vec3_Vec3(v,v)
#define dot_Vec4(v) dot_Vec4_Vec4(v,v)
#define dot_Vec2d(v) dot_Vec2d_Vec2d(v,v)
#define dot_Vec3d(v) dot_Vec3d_Vec3d(v,v)
#define dot_Vec4d(v) dot_Vec4d_Vec4d(v,v)
#endif
// Inline macros (faster). Call functions (i.e. dot_Vec3()) for slower but better memory usage.
#define DOT_VEC2(v) ((v)[0]*(v)[0] + (v)[1]*(v)[1])
#define DOT_VEC3(v) ((v)[0]*(v)[0] + (v)[1]*(v)[1] + (v)[2]*(v)[2])
#define DOT_VEC4(v) ((v)[0]*(v)[0] + (v)[1]*(v)[1] + (v)[2]*(v)[2] + (v)[3]*(v)[3])

// Magnitude or Norm
#define mag_Vec2(v) (_SQRT(dot_Vec2(v)))
#define mag_Vec3(v) (_SQRT(dot_Vec3(v)))
#define mag_Vec4(v) (_SQRT(dot_Vec4(v)))
#define mag_Vec2d(v) (sqrt(dot_Vec2d(v)))
#define mag_Vec3d(v) (sqrt(dot_Vec3d(v)))
#define mag_Vec4d(v) (sqrt(dot_Vec4d(v)))
#define MAG_VEC2(v) (_SQRT(DOT_VEC2(v)))
#define MAG_VEC3(v) (_SQRT(DOT_VEC3(v)))
#define MAG_VEC4(v) (_SQRT(DOT_VEC4(v)))
#define MAG_VEC2D(v) (sqrt(DOT_VEC2(v)))
#define MAG_VEC3D(v) (sqrt(DOT_VEC3(v)))
#define MAG_VEC4D(v) (sqrt(DOT_VEC4(v)))

#define EPSF32 (1.0e-16f) // Smallest number for safe division
#define EPSF64 (1.0e-16l) // Smallest number for safe division

#define recipNorm_Vec2(v) (1.0f/_MAX(mag_Vec2(v), EPSF32))
#define recipNorm_Vec3(v) (1.0f/_MAX(mag_Vec3(v), EPSF32))
#define recipNorm_Vec4(v) (1.0f/_MAX(mag_Vec4(v), EPSF32))
#define recipNorm_Vec3d(v) (1.0l/_MAX(mag_Vec3d(v), EPSF64))
#define recipNorm_Vec4d(v) (1.0l/_MAX(mag_Vec4d(v), EPSF64))

#define unwrap_Vec3(v) {UNWRAP_RAD_F32(v[0]); UNWRAP_RAD_F32(v[1]); UNWRAP_RAD_F32(v[2]) }
#define RECIPNORM_VEC2(v) (1.0f/_MAX(MAG_VEC2(v), EPSF32))
#define RECIPNORM_VEC3(v) (1.0f/_MAX(MAG_VEC3(v), EPSF32))
#define RECIPNORM_VEC4(v) (1.0f/_MAX(MAG_VEC4(v), EPSF32))
#define RECIPNORM_VEC3D(v) (1.0l/_MAX(MAG_VEC3D(v), EPSF64))
#define RECIPNORM_VEC4D(v) (1.0l/_MAX(MAG_VEC4D(v), EPSF64))

#define Vec3_OneLessThan_X(v,x) ( ((v[0])<(x)) || ((v[1])<(x)) || ((v[2])<(x)) )
#define Vec3_OneGrtrThan_X(v,x) ( ((v[0])>(x)) || ((v[1])>(x)) || ((v[2])>(x)) )
#define Vec3_AllLessThan_X(v,x) ( ((v[0])<(x)) && ((v[1])<(x)) && ((v[2])<(x)) )
#define Vec3_AllGrtrThan_X(v,x) ( ((v[0])>(x)) && ((v[1])>(x)) && ((v[2])>(x)) )
#define Vec3_IsZero(v) ( ((v[0])==(0.0f)) && ((v[1])==(0.0f)) && ((v[2])==(0.0f)) )
#define Vec3_IsAnyZero(v) ( ((v[0])==(0.0f)) || ((v[1])==(0.0f)) || ((v[2])==(0.0f)) )
#define Vec3_IsAnyNonZero(v) ( ((v[0])!=(0.0f)) || ((v[1])!=(0.0f)) || ((v[2])!=(0.0f)) )
#define UNWRAP_VEC3(v) {UNWRAP_RAD_F32(v[0]); UNWRAP_RAD_F32(v[1]); UNWRAP_RAD_F32(v[2]) }

#define Mat3x3_IsIdentity(m) ( (m[0]==1.0f) && (m[1]==0.0f) && (m[2]==0.0f) && \
(m[3]==0.0f) && (m[4]==1.0f) && (m[5]==0.0f) && \
(m[6]==0.0f) && (m[7]==0.0f) && (m[8]==1.0f) )
#define VEC3_ONELESSTHAN_X(v,x) ( ((v[0])<(x)) || ((v[1])<(x)) || ((v[2])<(x)) )
#define VEC3_ONEGRTRTHAN_X(v,x) ( ((v[0])>(x)) || ((v[1])>(x)) || ((v[2])>(x)) )
#define VEC3_ALLLESSTHAN_X(v,x) ( ((v[0])<(x)) && ((v[1])<(x)) && ((v[2])<(x)) )
#define VEC3_ALLGRTRTHAN_X(v,x) ( ((v[0])>(x)) && ((v[1])>(x)) && ((v[2])>(x)) )
#define VEC3_ISZERO(v) ( ((v[0])==(0.0f)) && ((v[1])==(0.0f)) && ((v[2])==(0.0f)) )
#define VEC3_ISANYZERO(v) ( ((v[0])==(0.0f)) || ((v[1])==(0.0f)) || ((v[2])==(0.0f)) )
#define VEC3_ISANYNONZERO(v) ( ((v[0])!=(0.0f)) || ((v[1])!=(0.0f)) || ((v[2])!=(0.0f)) )

#define set_Vec3_X(v,x) { (v[0])=(x); (v[1])=(x); (v[2])=(x); }
#define set_Vec4_X(v,x) { (v[0])=(x); (v[1])=(x); (v[2])=(x); (v[3])=(x); }
#define SET_VEC3_X(v,x) { (v[0])=(x); (v[1])=(x); (v[2])=(x); }
#define SET_VEC4_X(v,x) { (v[0])=(x); (v[1])=(x); (v[2])=(x); (v[3])=(x); }

#define is_NaN(v) ((v) != (v))
#define IS_NAN(v) ((v) != (v))

// Zero order low-pass filter
typedef struct
Expand Down Expand Up @@ -262,6 +247,16 @@ void abs_Vec2d( ixVector2d result, const ixVector2d v );
void abs_Vec3d( ixVector3d result, const ixVector3d v );
void abs_Vec4d( ixVector4d result, const ixVector4d v );

/* Dot product
* result = v(n) dot v(n)
*/
f_t dot_Vec2(const ixVector2 v);
f_t dot_Vec3(const ixVector3 v);
f_t dot_Vec4(const ixVector4 v);
double dot_Vec2d(const ixVector2d v);
double dot_Vec3d(const ixVector3d v);
double dot_Vec4d(const ixVector4d v);

/* Dot product
* result = v1(n) dot v2(n)
*/
Expand All @@ -272,6 +267,13 @@ double dot_Vec2d_Vec2d(const ixVector2d v1, const ixVector2d v2 );
double dot_Vec3d_Vec3d(const ixVector3d v1, const ixVector3d v2 );
double dot_Vec4d_Vec4d(const ixVector4d v1, const ixVector4d v2 );

/* Vector magnitude
* result = sqrt( v(n) dot v(n) )
*/
f_t mag_Vec2( const ixVector2 v);
f_t mag_Vec3( const ixVector3 v);
f_t mag_Vec4( const ixVector4 v);

/* Cross product
* result(3) = v1(3) x v2(3)
*/
Expand Down Expand Up @@ -510,6 +512,10 @@ static __inline void zero_MatMxN( f_t *M, i_t m, i_t n )
}
}

/**
* Return 1 if 3x3 matrix is an identity, 0 if not.
*/
int mat3x3_IsIdentity(const f_t m[]);

/* Copy vector
* result(3) = v(3)
Expand Down Expand Up @@ -637,7 +643,7 @@ char inv_Mat4( ixMatrix4 result, const ixMatrix4 m );
static __inline void normalize_Vec2( ixVector2 v )
{
// Normalize vector
mul_Vec2_X( v, v, recipNorm_Vec2(v) );
mul_Vec2_X( v, v, RECIPNORM_VEC2(v) );
}

/*
Expand All @@ -646,7 +652,7 @@ static __inline void normalize_Vec2( ixVector2 v )
static __inline void normalize_Vec3( ixVector3 result, const ixVector3 v )
{
// Normalize vector
mul_Vec3_X( result, v, recipNorm_Vec3(v) );
mul_Vec3_X( result, v, RECIPNORM_VEC3(v) );
}

/*
Expand All @@ -655,12 +661,12 @@ static __inline void normalize_Vec3( ixVector3 result, const ixVector3 v )
static __inline void normalize_Vec4( ixVector4 result, const ixVector4 v )
{
// Normalize vector
mul_Vec4_X( result, v, recipNorm_Vec4(v) );
mul_Vec4_X( result, v, RECIPNORM_VEC4(v) );
}
static __inline void normalize_Vec4d( ixVector4d result, const ixVector4d v )
{
// Normalize vector
mul_Vec4d_X( result, v, recipNorm_Vec4d(v) );
mul_Vec4d_X( result, v, RECIPNORM_VEC4D(v) );
}

/*
Expand Down Expand Up @@ -726,7 +732,7 @@ static __inline int isNan_array( f_t *a, int size )

for( i=0; i<size; i++ )
{
if( is_NaN(a[i]) )
if( IS_NAN(a[i]) )
return 1;
}

Expand All @@ -743,7 +749,7 @@ static __inline int isNan_array_d( double *a, int size )

for( i=0; i<size; i++ )
{
if( is_NaN(a[i]) )
if( IS_NAN(a[i]) )
return 1;
}

Expand Down
14 changes: 7 additions & 7 deletions src/ISPose.c
Original file line number Diff line number Diff line change
Expand Up @@ -120,17 +120,17 @@ void quat_Vec3_Vec3(ixQuat result, const ixVector3 v1, const ixVector3 v2)
ixVector3 w1, w2;

// Normalize input vectors
mul_Vec3_X( w1, v1, recipNorm_Vec3(v1) );
mul_Vec3_X( w2, v2, recipNorm_Vec3(v2) );
mul_Vec3_X( w1, v1, RECIPNORM_VEC3(v1) );
mul_Vec3_X( w2, v2, RECIPNORM_VEC3(v2) );

// q[1:3]
cross_Vec3( &result[1], w1, w2 );

// q[0]
result[0] = (f_t)(_SQRT( dot_Vec3(w1) * dot_Vec3(w1) ) + dot_Vec3_Vec3(w1, w2));
result[0] = (f_t)(_SQRT( DOT_VEC3(w1) * DOT_VEC3(w1) ) + dot_Vec3_Vec3(w1, w2));

// Normalize quaternion
div_Vec4_X( result, result, mag_Vec4(result) );
div_Vec4_X( result, result, MAG_VEC4(result) );
}


Expand Down Expand Up @@ -551,7 +551,7 @@ void quatW(const ixEuler euler, ixMatrix4 mat)
void quatRotAxis(const ixQuat q, ixVector3 pqr)
{
// Normalize quaternion
// mul_Vec4_X( q, q, 1/mag_Vec4(q) );
// mul_Vec4_X( q, q, 1/MAG_VEC4(q) );

// f_t theta = _ACOS( q[0] ) * (f_t)2.0;
f_t sin_a, d;
Expand Down Expand Up @@ -689,7 +689,7 @@ float vectorToRoll(const ixVector3 v)
*/
float vectorToPitch(const ixVector3 v)
{
float mag = mag_Vec3(v);
float mag = MAG_VEC3(v);
if(mag == 0.0f)
{
return 0.0f;
Expand All @@ -703,7 +703,7 @@ float vectorToPitch(const ixVector3 v)
*/
float vectorSelectedAxisToPitch(const ixVector3 v, int pitchAxis)
{
float mag = mag_Vec3(v);
float mag = MAG_VEC3(v);
if (mag == 0.0f)
{
return 0.0f;
Expand Down
2 changes: 1 addition & 1 deletion src/filters.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -431,7 +431,7 @@ void integrateDeltaThetaVelBortz(ixVector3 theta, ixVector3 vel, imus_t *imu, im
cross_Vec3(thxthxwb, theta, thxwb);
cross_Vec3(thxab, theta, ab);
cross_Vec3(thxthxab, theta, thxab);
mag_theta2 = dot_Vec3(theta);
mag_theta2 = DOT_VEC3(theta);
mag_theta4 = mag_theta2 * mag_theta2;
Kw = Kw0 + mag_theta2 * Kw1 + mag_theta4 * Kw2; // + mag_theta4 * mag_theta2 * Kw3; <--- the last term is negligibly small
for (int i = 0; i < 3; i++) {
Expand Down
2 changes: 1 addition & 1 deletion tests/test_math.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ bool testVectors()
VEC3_CLOSE(IS_v1_normalized, eig_v1_normalized);

// Norm
f_t IS_v1_norm = mag_Vec3(IS_v1);
f_t IS_v1_norm = MAG_VEC3(IS_v1);
double eig_v1_norm = eig_v1.norm();
REQUIRE_SUPER_CLOSE(IS_v1_norm, eig_v1_norm);
#endif
Expand Down

0 comments on commit 835cf84

Please sign in to comment.