Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ipc4: mixin: Restore back gain support #9038

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 26 additions & 12 deletions src/audio/mixin_mixout/mixin_mixout_generic.c
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,9 @@ static void mix_s16(struct cir_buf_ptr *sink, int32_t start_sample, int32_t mixe
nmax = (int16_t *)sink->buf_end - dst;
n = MIN(n, nmax);
for (i = 0; i < n; i++) {
*dst = sat_int16(*dst + *src++);
*dst = sat_int16((int32_t)*dst +
q_mults_16x16(*src, gain, IPC4_MIXIN_GAIN_SHIFT));
src++;
dst++;
}
}
Expand All @@ -49,9 +51,12 @@ static void mix_s16(struct cir_buf_ptr *sink, int32_t start_sample, int32_t mixe
n = MIN(left_samples, nmax);
nmax = (int16_t *)sink->buf_end - dst;
n = MIN(n, nmax);
memcpy_s(dst, n * sizeof(int16_t), src, n * sizeof(int16_t));
dst += n;
src += n;

for (i = 0; i < n; i++) {
*dst = q_mults_16x16(*src, gain, IPC4_MIXIN_GAIN_SHIFT);
src++;
dst++;
}
}
}
#endif /* CONFIG_FORMAT_S16LE */
Expand Down Expand Up @@ -81,7 +86,10 @@ static void mix_s24(struct cir_buf_ptr *sink, int32_t start_sample, int32_t mixe
nmax = (int32_t *)sink->buf_end - dst;
n = MIN(n, nmax);
for (i = 0; i < n; i++) {
*dst = sat_int24(sign_extend_s24(*dst) + sign_extend_s24(*src++));
*dst = sat_int24(sign_extend_s24(*dst) +
(int32_t)q_mults_32x32(sign_extend_s24(*src),
gain, IPC4_MIXIN_GAIN_SHIFT));
src++;
dst++;
}
}
Expand All @@ -93,9 +101,11 @@ static void mix_s24(struct cir_buf_ptr *sink, int32_t start_sample, int32_t mixe
n = MIN(left_samples, nmax);
nmax = (int32_t *)sink->buf_end - dst;
n = MIN(n, nmax);
memcpy_s(dst, n * sizeof(int32_t), src, n * sizeof(int32_t));
dst += n;
src += n;
for (i = 0; i < n; i++) {
*dst = q_mults_32x32(sign_extend_s24(*src), gain, IPC4_MIXIN_GAIN_SHIFT);
src++;
dst++;
}
}
}

Expand Down Expand Up @@ -125,7 +135,9 @@ static void mix_s32(struct cir_buf_ptr *sink, int32_t start_sample, int32_t mixe
nmax = (int32_t *)sink->buf_end - dst;
n = MIN(n, nmax);
for (i = 0; i < n; i++) {
*dst = sat_int32((int64_t)*dst + (int64_t)*src++);
*dst = sat_int32((int64_t)*dst +
q_mults_32x32(*src, gain, IPC4_MIXIN_GAIN_SHIFT));
src++;
dst++;
}
}
Expand All @@ -137,9 +149,11 @@ static void mix_s32(struct cir_buf_ptr *sink, int32_t start_sample, int32_t mixe
n = MIN(left_samples, nmax);
nmax = (int32_t *)sink->buf_end - dst;
n = MIN(n, nmax);
memcpy_s(dst, n * sizeof(int32_t), src, n * sizeof(int32_t));
dst += n;
src += n;
for (i = 0; i < n; i++) {
*dst = q_mults_32x32(*src, gain, IPC4_MIXIN_GAIN_SHIFT);
src++;
dst++;
}
}
}

Expand Down
95 changes: 94 additions & 1 deletion src/audio/mixin_mixout/mixin_mixout_hifi3.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@ static void mix_s16(struct cir_buf_ptr *sink, int32_t start_sample, int32_t mixe
/* cir_buf_wrap() is required and is done below in a loop */
ae_int16 *dst = (ae_int16 *)sink->ptr + start_sample;
ae_int16 *src = source->ptr;
ae_int16x4 gain_vec;
ae_int32x2 tmpl, tmph;

gain_vec = AE_L16_I((ae_int16 *)&gain, 0);

assert(mixed_samples >= start_sample);
samples_to_mix = AE_MIN_32_signed(mixed_samples - start_sample, sample_count);
Expand All @@ -50,6 +54,13 @@ static void mix_s16(struct cir_buf_ptr *sink, int32_t start_sample, int32_t mixe
/* process 4 frames per loop */
for (i = 0; i < m; i++) {
AE_LA16X4_IP(in_sample, inu, in);

/* apply gain to in_sample */
AE_MUL16X4(tmph, tmpl, in_sample, gain_vec);
tmpl = AE_SRAI32(tmpl, IPC4_MIXIN_GAIN_SHIFT);
tmph = AE_SRAI32(tmph, IPC4_MIXIN_GAIN_SHIFT);
in_sample = AE_CVT16X4(tmph, tmpl);

AE_LA16X4_IP(out_sample, outu1, out);
out--;
out_sample = AE_ADD16S(in_sample, out_sample);
Expand All @@ -62,6 +73,11 @@ static void mix_s16(struct cir_buf_ptr *sink, int32_t start_sample, int32_t mixe
*/
for (i = 0; i < left ; i++) {
AE_L16_IP(in_sample, (ae_int16 *)in, sizeof(ae_int16));

AE_MUL16X4(tmph, tmpl, in_sample, gain_vec);
tmpl = AE_SRAI32(tmpl, IPC4_MIXIN_GAIN_SHIFT);
in_sample = AE_CVT16X4(tmpl, tmpl);

AE_L16_IP(out_sample, (ae_int16 *)out, 0);
out_sample = AE_ADD16S(in_sample, out_sample);
AE_S16_0_IP(out_sample, (ae_int16 *)out, sizeof(ae_int16));
Expand All @@ -84,6 +100,12 @@ static void mix_s16(struct cir_buf_ptr *sink, int32_t start_sample, int32_t mixe
/* process 4 frames per loop */
for (i = 0; i < m; i++) {
AE_LA16X4_IP(in_sample, inu, in);

AE_MUL16X4(tmph, tmpl, in_sample, gain_vec);
tmpl = AE_SRAI32(tmpl, IPC4_MIXIN_GAIN_SHIFT);
tmph = AE_SRAI32(tmph, IPC4_MIXIN_GAIN_SHIFT);
in_sample = AE_CVT16X4(tmph, tmpl);

AE_SA16X4_IP(in_sample, outu2, out);
}
AE_SA64POS_FP(outu2, out);
Expand All @@ -93,6 +115,11 @@ static void mix_s16(struct cir_buf_ptr *sink, int32_t start_sample, int32_t mixe
*/
for (i = 0; i < left ; i++) {
AE_L16_IP(in_sample, (ae_int16 *)in, sizeof(ae_int16));

AE_MUL16X4(tmph, tmpl, in_sample, gain_vec);
tmpl = AE_SRAI32(tmpl, IPC4_MIXIN_GAIN_SHIFT);
in_sample = AE_CVT16X4(tmpl, tmpl);

AE_S16_0_IP(in_sample, (ae_int16 *)out, sizeof(ae_int16));
}
}
Expand All @@ -106,7 +133,7 @@ static void mix_s24(struct cir_buf_ptr *sink, int32_t start_sample, int32_t mixe
{
int samples_to_mix, samples_to_copy, left_samples;
int n, nmax, i, m, left;
ae_int32x2 in_sample;
ae_int32x2 in_sample, in_sample32;
ae_int32x2 out_sample;
ae_int32x2 *in;
ae_int32x2 *out;
Expand All @@ -116,6 +143,10 @@ static void mix_s24(struct cir_buf_ptr *sink, int32_t start_sample, int32_t mixe
/* cir_buf_wrap() is required and is done below in a loop */
int32_t *dst = (int32_t *)sink->ptr + start_sample;
int32_t *src = source->ptr;
ae_int16x4 gain_vec;
ae_int64 tmph, tmpl;

gain_vec = AE_L16_I((ae_int16 *)&gain, 0);

assert(mixed_samples >= start_sample);
samples_to_mix = AE_MIN_32_signed(mixed_samples - start_sample, sample_count);
Expand All @@ -139,6 +170,16 @@ static void mix_s24(struct cir_buf_ptr *sink, int32_t start_sample, int32_t mixe
/* process 2 samples per time */
for (i = 0; i < m; i++) {
AE_LA32X2_IP(in_sample, inu, in);

/* apply gain to in_sample */
in_sample32 = AE_SLAI32(in_sample, 8); /* sign extension */
tmpl = AE_MUL32X16_L0(in_sample32, gain_vec);
tmph = AE_MUL32X16_H0(in_sample32, gain_vec);
tmpl = AE_SRAI64(tmpl, 8 + IPC4_MIXIN_GAIN_SHIFT);
tmph = AE_SRAI64(tmph, 8 + IPC4_MIXIN_GAIN_SHIFT);
in_sample = AE_SEL32_LL(AE_MOVINT32X2_FROMINT64(tmph),
AE_MOVINT32X2_FROMINT64(tmpl));

AE_LA32X2_IP(out_sample, outu1, out);
out--;
out_sample = AE_ADD24S(in_sample, out_sample);
Expand All @@ -149,6 +190,12 @@ static void mix_s24(struct cir_buf_ptr *sink, int32_t start_sample, int32_t mixe
/* process the left sample to avoid memory access overrun */
if (left) {
AE_L32_IP(in_sample, (ae_int32 *)in, sizeof(ae_int32));

in_sample32 = AE_SLAI32(in_sample, 8); /* sign extension */
tmpl = AE_MUL32X16_L0(in_sample32, gain_vec);
tmpl = AE_SRAI64(tmpl, 8 + IPC4_MIXIN_GAIN_SHIFT);
in_sample = AE_MOVINT32X2_FROMINT64(tmpl);

AE_L32_IP(out_sample, (ae_int32 *)out, 0);
out_sample = AE_ADD24S(in_sample, out_sample);
AE_S32_L_IP(out_sample, (ae_int32 *)out, sizeof(ae_int32));
Expand All @@ -169,12 +216,27 @@ static void mix_s24(struct cir_buf_ptr *sink, int32_t start_sample, int32_t mixe
left = n & 1;
for (i = 0; i < m; i++) {
AE_LA32X2_IP(in_sample, inu, in);

in_sample32 = AE_SLAI32(in_sample, 8); /* sign extension */
tmpl = AE_MUL32X16_L0(in_sample32, gain_vec);
tmph = AE_MUL32X16_H0(in_sample32, gain_vec);
tmpl = AE_SRAI64(tmpl, 8 + IPC4_MIXIN_GAIN_SHIFT);
tmph = AE_SRAI64(tmph, 8 + IPC4_MIXIN_GAIN_SHIFT);
in_sample = AE_SEL32_LL(AE_MOVINT32X2_FROMINT64(tmph),
AE_MOVINT32X2_FROMINT64(tmpl));

AE_SA32X2_IP(in_sample, outu2, out);
}
AE_SA64POS_FP(outu2, out);
/* process the left sample to avoid memory access overrun */
if (left) {
AE_L32_IP(in_sample, (ae_int32 *)in, sizeof(ae_int32));

in_sample32 = AE_SLAI32(in_sample, 8); /* sign extension */
tmpl = AE_MUL32X16_L0(in_sample32, gain_vec);
tmpl = AE_SRAI64(tmpl, 8 + IPC4_MIXIN_GAIN_SHIFT);
in_sample = AE_MOVINT32X2_FROMINT64(tmpl);

AE_S32_L_IP(in_sample, (ae_int32 *)out, sizeof(ae_int32));
}
}
Expand All @@ -199,6 +261,10 @@ static void mix_s32(struct cir_buf_ptr *sink, int32_t start_sample, int32_t mixe
/* cir_buf_wrap() is required and is done below in a loop */
int32_t *dst = (int32_t *)sink->ptr + start_sample;
int32_t *src = source->ptr;
ae_int16x4 gain_vec;
ae_int64 tmpl, tmph;

gain_vec = AE_L16_I((ae_int16 *)&gain, 0);

assert(mixed_samples >= start_sample);
samples_to_mix = AE_MIN_32_signed(mixed_samples - start_sample, sample_count);
Expand All @@ -221,6 +287,15 @@ static void mix_s32(struct cir_buf_ptr *sink, int32_t start_sample, int32_t mixe
left = n & 1;
for (i = 0; i < m; i++) {
AE_LA32X2_IP(in_sample, inu, in);

/* apply gain to in_sample */
tmpl = AE_MUL32X16_L0(in_sample, gain_vec);
tmph = AE_MUL32X16_H0(in_sample, gain_vec);
tmpl = AE_SRAI64(tmpl, IPC4_MIXIN_GAIN_SHIFT);
tmph = AE_SRAI64(tmph, IPC4_MIXIN_GAIN_SHIFT);
in_sample = AE_SEL32_LL(AE_MOVINT32X2_FROMINT64(tmph),
AE_MOVINT32X2_FROMINT64(tmpl));

AE_LA32X2_IP(out_sample, outu1, out);
out--;
out_sample = AE_ADD32S(in_sample, out_sample);
Expand All @@ -231,6 +306,11 @@ static void mix_s32(struct cir_buf_ptr *sink, int32_t start_sample, int32_t mixe
/* process the left sample to avoid memory access overrun */
if (left) {
AE_L32_IP(in_sample, (ae_int32 *)in, sizeof(ae_int32));

tmpl = AE_MUL32X16_L0(in_sample, gain_vec);
tmpl = AE_SRAI64(tmpl, IPC4_MIXIN_GAIN_SHIFT);
in_sample = AE_MOVINT32X2_FROMINT64(tmpl);

AE_L32_IP(out_sample, (ae_int32 *)out, 0);
out_sample = AE_ADD32S(in_sample, out_sample);
AE_S32_L_IP(out_sample, (ae_int32 *)out, sizeof(ae_int32));
Expand All @@ -252,13 +332,26 @@ static void mix_s32(struct cir_buf_ptr *sink, int32_t start_sample, int32_t mixe
left = n & 1;
for (i = 0; i < m; i++) {
AE_LA32X2_IP(in_sample, inu, in);

tmpl = AE_MUL32X16_L0(in_sample, gain_vec);
tmph = AE_MUL32X16_H0(in_sample, gain_vec);
tmpl = AE_SRAI64(tmpl, IPC4_MIXIN_GAIN_SHIFT);
tmph = AE_SRAI64(tmph, IPC4_MIXIN_GAIN_SHIFT);
in_sample = AE_SEL32_LL(AE_MOVINT32X2_FROMINT64(tmph),
AE_MOVINT32X2_FROMINT64(tmpl));

AE_SA32X2_IP(in_sample, outu2, out);
}
AE_SA64POS_FP(outu2, out);

/* process the left sample to avoid memory access overrun */
if (left) {
AE_L32_IP(in_sample, (ae_int32 *)in, sizeof(ae_int32));

tmpl = AE_MUL32X16_L0(in_sample, gain_vec);
tmpl = AE_SRAI64(tmpl, IPC4_MIXIN_GAIN_SHIFT);
in_sample = AE_MOVINT32X2_FROMINT64(tmpl);

AE_S32_L_IP(in_sample, (ae_int32 *)out, sizeof(ae_int32));
}
}
Expand Down