Skip to content

Commit

Permalink
Math: Optimise 16-bit matrix multiplication functions.
Browse files Browse the repository at this point in the history
Improve mat_multiply and mat_multiply_elementwise for 16-bit signed
integers by refactoring operations and simplifying handling of Q0 data.
Both functions now use incremented pointers in loop expressions for
better legibility and potential compiler optimisation.

Changes:
- Improved x pointer increments in mat_multiply.
- Integrating Q0 conditional logic into the main flow.
- Clean up the mat_multiply_elementwise loop structure.
- Ensure precision with appropriate fractional bit shifts.

These modifications aim to improve accuracy and efficiency in
fixed-point arithmetic operations.

Signed-off-by: Shriram Shastry <malladi.sastry@intel.com>
  • Loading branch information
ShriramShastry committed May 2, 2024
1 parent 07b762e commit 5493610
Showing 1 changed file with 59 additions and 54 deletions.
113 changes: 59 additions & 54 deletions src/math/matrix.c
Original file line number Diff line number Diff line change
Expand Up @@ -3,93 +3,98 @@
// Copyright(c) 2022 Intel Corporation. All rights reserved.
//
// Author: Seppo Ingalsuo <seppo.ingalsuo@linux.intel.com>
// Shriram Shastry <malladi.sastry@linux.intel.com>

#include <sof/math/matrix.h>
#include <errno.h>
#include <stdint.h>

int mat_multiply(struct mat_matrix_16b *a, struct mat_matrix_16b *b, struct mat_matrix_16b *c)
{
int64_t s;
int16_t *x;
int16_t *y;
int16_t *z = c->data;
int i, j, k;
int y_inc = b->columns;
const int shift_minus_one = a->fractions + b->fractions - c->fractions - 1;
/* check for NULL pointers */
if (!a || !b || !c)
return -EINVAL;

/* check for dimensions compatibility */
if (a->columns != b->rows || a->rows != c->rows || b->columns != c->columns)
return -EINVAL;

/* If all data is Q0 */
if (shift_minus_one == -1) {
for (i = 0; i < a->rows; i++) {
for (j = 0; j < b->columns; j++) {
s = 0;
x = a->data + a->columns * i;
y = b->data + j;
for (k = 0; k < b->rows; k++) {
s += (int32_t)(*x) * (*y);
x++;
y += y_inc;
}
*z = (int16_t)s; /* For Q16.0 */
z++;
}
}
int64_t acc;
int16_t *x, *y, *z = c->data;
int i, j, k;
/* Increment for pointer y to jump to the next row in matrix B */
int y_inc = b->columns;
const int shift_minus_one = a->fractions + b->fractions - c->fractions - 1;
/* Check for integer overflows during the shift operation. */
if (shift_minus_one < -1 || shift_minus_one > 62)
return -ERANGE;

return 0;
}
const int shift = shift_minus_one + 1;

/* Calculate offset for rounding. The offset is only needed when shift is
* non-negative (> 0). Since shift = shift_minus_one + 1, the check for non-negative
* value is (shift_minus_one >= -1)
*/
const int64_t offset = (shift_minus_one >= -1) ? (1LL << shift_minus_one) : 0;

for (i = 0; i < a->rows; i++) {
for (j = 0; j < b->columns; j++) {
s = 0;
acc = 0;
/* Position pointer x at the beginning of the ith row in matrix A */
x = a->data + a->columns * i;
/* Position pointer y at the beginning of the jth column in matrix B */
y = b->data + j;
for (k = 0; k < b->rows; k++) {
s += (int32_t)(*x) * (*y);
x++;
/* Multiply elements and add to sum */
acc += (int64_t)(*x++) * (*y);
/* Move pointer y to the next element in the column */
y += y_inc;
}
*z = (int16_t)(((s >> shift_minus_one) + 1) >> 1); /*Shift to Qx.y */
z++;
/* If shift == 0, then shift_minus_one == -1, which means the data is Q16.0
* Otherwise, add the offset before the shift to round up if necessary
*/
*z++ = (shift == 0) ? (int16_t)acc : (int16_t)((acc + offset) >> shift);
}
}

return 0;
}

int mat_multiply_elementwise(struct mat_matrix_16b *a, struct mat_matrix_16b *b,
struct mat_matrix_16b *c)
{ int64_t p;
{
/* check for NULL pointers */
if (!a || !b || !c)
return -EINVAL;
/* Validate dimensions match for elementwise multiplication */
if (a->columns != b->columns || a->rows != b->rows)
return -EINVAL;

const int total_elements = a->rows * a->columns;
/* Calculate shift for result Q format */
const int shift = a->fractions + b->fractions - c->fractions;
/* Check for integer overflows during the shift operation. */
if (shift < -1 || shift > 62)
return -ERANGE;
/* Offset for rounding */
const int64_t offset = (shift >= 0) ? (1LL << (shift - 1)) : 0;

int16_t *x = a->data;
int16_t *y = b->data;
int16_t *z = c->data;
int64_t acc;
int i;
const int shift_minus_one = a->fractions + b->fractions - c->fractions - 1;

if (a->columns != b->columns || b->columns != c->columns ||
a->rows != b->rows || b->rows != c->rows) {
return -EINVAL;
}

/* If all data is Q0 */
if (shift_minus_one == -1) {
for (i = 0; i < a->rows * a->columns; i++) {
*z = *x * *y;
x++;
y++;
z++;
/* When no shifting is required (shift -1 indicated), simply multiply */
if (shift == -1) {
for (i = 0; i < total_elements; i++)
z[i] = x[i] * y[i]; /* Direct elementwise multiplication */
} else {
/* General case with shifting and offset for rounding */
for (i = 0; i < total_elements; i++) {
acc = (int64_t)x[i] * (int64_t)y[i]; /* Cast to int64_t to avoid overflow */
z[i] = (int16_t)((acc + offset) >> shift); /* Apply shift and offset */
}

return 0;
}

for (i = 0; i < a->rows * a->columns; i++) {
p = (int32_t)(*x) * *y;
*z = (int16_t)(((p >> shift_minus_one) + 1) >> 1); /*Shift to Qx.y */
x++;
y++;
z++;
}

return 0;
Expand Down

0 comments on commit 5493610

Please sign in to comment.