Skip to content

Commit

Permalink
Audio: Dcblock: Add HiFi3 implementation of dcblock
Browse files Browse the repository at this point in the history
Add HiFi3 implementation of dcblock processing functions.
Compared with generic C version, the 16 bit format can save
about 48.1% cycles, and 48.4% for 24 bit format and 52.6%
for 32 bit.

Signed-off-by: Andrula Song <andrula.song@intel.com>
  • Loading branch information
andrula-song authored and lgirdwood committed Mar 21, 2023
1 parent 9aa2c13 commit abd81a1
Show file tree
Hide file tree
Showing 5 changed files with 198 additions and 1 deletion.
2 changes: 1 addition & 1 deletion src/audio/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ set(src_sources src/src.c src/src_generic.c)
set(asrc_sources asrc/asrc.c asrc/asrc_farrow.c asrc/asrc_farrow_generic.c)
set(eq-fir_sources module_adapter/module_adapter.c module_adapter/module/generic.c eq_fir/eq_fir.c eq_fir/eq_fir_generic.c)
set(eq-iir_sources module_adapter/module_adapter.c module_adapter/module/generic.c eq_iir/eq_iir.c)
set(dcblock_sources dcblock/dcblock.c dcblock/dcblock_generic.c dcblock/dcblock_hifi4.c)
set(dcblock_sources dcblock/dcblock.c dcblock/dcblock_generic.c dcblock/dcblock_hifi3.c dcblock/dcblock_hifi4.c)
set(crossover_sources crossover/crossover.c crossover/crossover_generic.c)
set(tdfb_sources tdfb/tdfb.c tdfb/tdfb_generic.c tdfb/tdfb_direction.c)
set(drc_sources drc/drc.c drc/drc_generic.c drc/drc_math_generic.c)
Expand Down
1 change: 1 addition & 0 deletions src/audio/dcblock/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
add_local_sources(sof dcblock.c)
add_local_sources(sof dcblock_generic.c)
add_local_sources(sof dcblock_hifi3.c)
add_local_sources(sof dcblock_hifi4.c)
193 changes: 193 additions & 0 deletions src/audio/dcblock/dcblock_hifi3.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,193 @@
// SPDX-License-Identifier: BSD-3-Clause
//
// Copyright(c) 2022 Intel Corporation. All rights reserved.
//
// Author: Andrula Song <andrula.song@intel.com>

#include <stdint.h>
#include <sof/audio/component.h>
#include <sof/audio/format.h>
#include <sof/audio/dcblock/dcblock.h>

#ifdef DCBLOCK_HIFI3

#include <xtensa/tie/xt_hifi3.h>
LOG_MODULE_DECLARE(dcblock, CONFIG_SOF_LOG_LEVEL);

static inline ae_int32x2 dcblock_cal(ae_int32x2 R, ae_int32x2 state_x, ae_int32x2 state_y,
ae_int32x2 sample)
{
ae_int64 out, temp;

/* R: Q2.30, y_prev: Q1.31 the result is Q2.62 */
temp = AE_MULF32S_LL(R, state_y);
out = AE_SUB64(AE_MOVAD32_L(sample), AE_MOVAD32_L(state_x));
/* shift out to 2.62 */
out = AE_ADD64S(AE_SLAI64S(out, 31), temp);
/* shift out to 1.63 */
return AE_ROUND32F64SSYM(AE_SLAI64S(out, 1));
}

/* Setup circular for component source */
static inline void dcblock_set_circular(const struct audio_stream __sparse_cache *source)
{
/* Set source as circular buffer 0 */
AE_SETCBEGIN0(source->addr);
AE_SETCEND0(source->end_addr);
}

#if CONFIG_FORMAT_S16LE
static void dcblock_s16_default(const struct comp_dev *dev,
const struct audio_stream __sparse_cache *source,
const struct audio_stream __sparse_cache *sink,
uint32_t frames)
{
struct comp_data *cd = comp_get_drvdata(dev);
ae_int16 *src = (ae_int16 *)source->r_ptr;
ae_int16 *dst = (ae_int16 *)sink->w_ptr;
ae_int16 *in;
ae_int16 *out;
ae_int32x2 R, state_x, state_y, sample;
ae_int16x4 in_sample, out_sample;
int ch, i, n;
int nch = source->channels;
const int inc = nch * sizeof(ae_int16);
int samples = nch * frames;

dcblock_set_circular(source);
while (samples) {
n = audio_stream_samples_without_wrap_s16(sink, dst);
n = MIN(n, samples);
for (ch = 0; ch < nch; ch++) {
in = src + ch;
out = dst + ch;
state_x = cd->state[ch].x_prev;
state_y = cd->state[ch].y_prev;
R = cd->R_coeffs[ch];
for (i = 0; i < n; i += nch) {
/* Load a 16 bit sample*/
AE_L16_XC(in_sample, in, inc);
/* store the 16 bit sample to high 16bit of 32bit register*/
sample = AE_CVT32X2F16_32(in_sample);
state_y = dcblock_cal(R, state_x, state_y, sample);
state_x = sample;
out_sample = AE_ROUND16X4F32SSYM(state_y, state_y);
AE_S16_0_XP(out_sample, out, inc);
}
cd->state[ch].x_prev = state_x;
cd->state[ch].y_prev = state_y;
}
samples -= n;
dst = audio_stream_wrap(sink, dst + n);
src = audio_stream_wrap(source, src + n);
}
}
#endif /* CONFIG_FORMAT_S16LE */

#if CONFIG_FORMAT_S24LE
static void dcblock_s24_default(const struct comp_dev *dev,
const struct audio_stream __sparse_cache *source,
const struct audio_stream __sparse_cache *sink,
uint32_t frames)
{
struct comp_data *cd = comp_get_drvdata(dev);
ae_int32 *src = (ae_int32 *)source->r_ptr;
ae_int32 *dst = (ae_int32 *)sink->w_ptr;
ae_int32 *in;
ae_int32 *out;
ae_int32x2 R, state_x, state_y;
ae_int32x2 in_sample, out_sample;
int ch, i, n;
int nch = source->channels;
const int inc = nch * sizeof(ae_int32);
int samples = nch * frames;

dcblock_set_circular(source);
while (samples) {
n = audio_stream_samples_without_wrap_s24(sink, dst);
n = MIN(n, samples);
for (ch = 0; ch < nch; ch++) {
in = src + ch;
out = dst + ch;
state_x = cd->state[ch].x_prev;
state_y = cd->state[ch].y_prev;
R = cd->R_coeffs[ch];
for (i = 0; i < n; i += nch) {
AE_L32_XC(in_sample, in, inc);
in_sample = AE_SLAI32(in_sample, 8);
state_y = dcblock_cal(R, state_x, state_y, in_sample);
state_x = in_sample;
out_sample = AE_SRAI32R(state_y, 8);
out_sample = AE_SLAI32S(out_sample, 8);
out_sample = AE_SRAI32R(out_sample, 8);
AE_S32_L_XP(out_sample, out, inc);
}
cd->state[ch].x_prev = state_x;
cd->state[ch].y_prev = state_y;
}
samples -= n;
dst = audio_stream_wrap(sink, dst + n);
src = audio_stream_wrap(source, src + n);
}
}
#endif /* CONFIG_FORMAT_S24LE */

#if CONFIG_FORMAT_S32LE
static void dcblock_s32_default(const struct comp_dev *dev,
const struct audio_stream __sparse_cache *source,
const struct audio_stream __sparse_cache *sink,
uint32_t frames)
{
struct comp_data *cd = comp_get_drvdata(dev);
ae_int32 *src = (ae_int32 *)source->r_ptr;
ae_int32 *dst = (ae_int32 *)sink->w_ptr;
ae_int32 *in;
ae_int32 *out;
ae_int32x2 R, state_x, state_y;
ae_int32x2 in_sample;
int ch, i, n;
int nch = source->channels;
const int inc = nch * sizeof(ae_int32);
int samples = nch * frames;

dcblock_set_circular(source);
while (samples) {
n = audio_stream_samples_without_wrap_s32(sink, dst);
n = MIN(n, samples);
for (ch = 0; ch < nch; ch++) {
in = src + ch;
out = dst + ch;
state_x = cd->state[ch].x_prev;
state_y = cd->state[ch].y_prev;
R = cd->R_coeffs[ch];
for (i = 0; i < n; i += nch) {
AE_L32_XC(in_sample, in, inc);
state_y = dcblock_cal(R, state_x, state_y, in_sample);
state_x = in_sample;
AE_S32_L_XP(state_y, out, inc);
}
cd->state[ch].x_prev = state_x;
cd->state[ch].y_prev = state_y;
}
samples -= n;
dst = audio_stream_wrap(sink, dst + n);
src = audio_stream_wrap(source, src + n);
}
}
#endif /* CONFIG_FORMAT_S32LE */

const struct dcblock_func_map dcblock_fnmap[] = {
/* { SOURCE_FORMAT , PROCESSING FUNCTION } */
#if CONFIG_FORMAT_S16LE
{ SOF_IPC_FRAME_S16_LE, dcblock_s16_default },
#endif /* CONFIG_FORMAT_S16LE */
#if CONFIG_FORMAT_S24LE
{ SOF_IPC_FRAME_S24_4LE, dcblock_s24_default },
#endif /* CONFIG_FORMAT_S24LE */
#if CONFIG_FORMAT_S32LE
{ SOF_IPC_FRAME_S32_LE, dcblock_s32_default },
#endif /* CONFIG_FORMAT_S32LE */
};

const size_t dcblock_fncount = ARRAY_SIZE(dcblock_fnmap);
#endif
2 changes: 2 additions & 0 deletions src/include/sof/audio/dcblock/dcblock.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
# include <xtensa/config/core-isa.h>
# if XCHAL_HAVE_HIFI4
# define DCBLOCK_HIFI4
# elif XCHAL_HAVE_HIFI3
# define DCBLOCK_HIFI3
# else
# define DCBLOCK_GENERIC
# endif
Expand Down
1 change: 1 addition & 0 deletions zephyr/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -467,6 +467,7 @@ zephyr_library_sources_ifdef(CONFIG_COMP_ASRC
zephyr_library_sources_ifdef(CONFIG_COMP_DCBLOCK
${SOF_AUDIO_PATH}/dcblock/dcblock_generic.c
${SOF_AUDIO_PATH}/dcblock/dcblock.c
${SOF_AUDIO_PATH}/dcblock/dcblock_hifi3.c
${SOF_AUDIO_PATH}/dcblock/dcblock_hifi4.c
)

Expand Down

0 comments on commit abd81a1

Please sign in to comment.