From abd81a13836f278fade0987875abd62951f10d8f Mon Sep 17 00:00:00 2001 From: Andrula Song Date: Thu, 16 Mar 2023 15:41:30 +0800 Subject: [PATCH] Audio: Dcblock: Add HiFi3 implementation of dcblock Add HiFi3 implementation of dcblock processing functions. Compared with generic C version, the 16 bit format can save about 48.1% cycles, and 48.4% for 24 bit format and 52.6% for 32 bit. Signed-off-by: Andrula Song --- src/audio/CMakeLists.txt | 2 +- src/audio/dcblock/CMakeLists.txt | 1 + src/audio/dcblock/dcblock_hifi3.c | 193 ++++++++++++++++++++++++ src/include/sof/audio/dcblock/dcblock.h | 2 + zephyr/CMakeLists.txt | 1 + 5 files changed, 198 insertions(+), 1 deletion(-) create mode 100644 src/audio/dcblock/dcblock_hifi3.c diff --git a/src/audio/CMakeLists.txt b/src/audio/CMakeLists.txt index 414ee88a1b3b..50aa1a0e9793 100644 --- a/src/audio/CMakeLists.txt +++ b/src/audio/CMakeLists.txt @@ -172,7 +172,7 @@ set(src_sources src/src.c src/src_generic.c) set(asrc_sources asrc/asrc.c asrc/asrc_farrow.c asrc/asrc_farrow_generic.c) set(eq-fir_sources module_adapter/module_adapter.c module_adapter/module/generic.c eq_fir/eq_fir.c eq_fir/eq_fir_generic.c) set(eq-iir_sources module_adapter/module_adapter.c module_adapter/module/generic.c eq_iir/eq_iir.c) -set(dcblock_sources dcblock/dcblock.c dcblock/dcblock_generic.c dcblock/dcblock_hifi4.c) +set(dcblock_sources dcblock/dcblock.c dcblock/dcblock_generic.c dcblock/dcblock_hifi3.c dcblock/dcblock_hifi4.c) set(crossover_sources crossover/crossover.c crossover/crossover_generic.c) set(tdfb_sources tdfb/tdfb.c tdfb/tdfb_generic.c tdfb/tdfb_direction.c) set(drc_sources drc/drc.c drc/drc_generic.c drc/drc_math_generic.c) diff --git a/src/audio/dcblock/CMakeLists.txt b/src/audio/dcblock/CMakeLists.txt index 5655330ab8aa..9f883f11daaf 100644 --- a/src/audio/dcblock/CMakeLists.txt +++ b/src/audio/dcblock/CMakeLists.txt @@ -1,3 +1,4 @@ add_local_sources(sof dcblock.c) add_local_sources(sof dcblock_generic.c) +add_local_sources(sof dcblock_hifi3.c) add_local_sources(sof dcblock_hifi4.c) diff --git a/src/audio/dcblock/dcblock_hifi3.c b/src/audio/dcblock/dcblock_hifi3.c new file mode 100644 index 000000000000..df18d6d1a01d --- /dev/null +++ b/src/audio/dcblock/dcblock_hifi3.c @@ -0,0 +1,193 @@ +// SPDX-License-Identifier: BSD-3-Clause +// +// Copyright(c) 2022 Intel Corporation. All rights reserved. +// +// Author: Andrula Song + +#include +#include +#include +#include + +#ifdef DCBLOCK_HIFI3 + +#include +LOG_MODULE_DECLARE(dcblock, CONFIG_SOF_LOG_LEVEL); + +static inline ae_int32x2 dcblock_cal(ae_int32x2 R, ae_int32x2 state_x, ae_int32x2 state_y, + ae_int32x2 sample) +{ + ae_int64 out, temp; + + /* R: Q2.30, y_prev: Q1.31 the result is Q2.62 */ + temp = AE_MULF32S_LL(R, state_y); + out = AE_SUB64(AE_MOVAD32_L(sample), AE_MOVAD32_L(state_x)); + /* shift out to 2.62 */ + out = AE_ADD64S(AE_SLAI64S(out, 31), temp); + /* shift out to 1.63 */ + return AE_ROUND32F64SSYM(AE_SLAI64S(out, 1)); +} + +/* Setup circular for component source */ +static inline void dcblock_set_circular(const struct audio_stream __sparse_cache *source) +{ + /* Set source as circular buffer 0 */ + AE_SETCBEGIN0(source->addr); + AE_SETCEND0(source->end_addr); +} + +#if CONFIG_FORMAT_S16LE +static void dcblock_s16_default(const struct comp_dev *dev, + const struct audio_stream __sparse_cache *source, + const struct audio_stream __sparse_cache *sink, + uint32_t frames) +{ + struct comp_data *cd = comp_get_drvdata(dev); + ae_int16 *src = (ae_int16 *)source->r_ptr; + ae_int16 *dst = (ae_int16 *)sink->w_ptr; + ae_int16 *in; + ae_int16 *out; + ae_int32x2 R, state_x, state_y, sample; + ae_int16x4 in_sample, out_sample; + int ch, i, n; + int nch = source->channels; + const int inc = nch * sizeof(ae_int16); + int samples = nch * frames; + + dcblock_set_circular(source); + while (samples) { + n = audio_stream_samples_without_wrap_s16(sink, dst); + n = MIN(n, samples); + for (ch = 0; ch < nch; ch++) { + in = src + ch; + out = dst + ch; + state_x = cd->state[ch].x_prev; + state_y = cd->state[ch].y_prev; + R = cd->R_coeffs[ch]; + for (i = 0; i < n; i += nch) { + /* Load a 16 bit sample*/ + AE_L16_XC(in_sample, in, inc); + /* store the 16 bit sample to high 16bit of 32bit register*/ + sample = AE_CVT32X2F16_32(in_sample); + state_y = dcblock_cal(R, state_x, state_y, sample); + state_x = sample; + out_sample = AE_ROUND16X4F32SSYM(state_y, state_y); + AE_S16_0_XP(out_sample, out, inc); + } + cd->state[ch].x_prev = state_x; + cd->state[ch].y_prev = state_y; + } + samples -= n; + dst = audio_stream_wrap(sink, dst + n); + src = audio_stream_wrap(source, src + n); + } +} +#endif /* CONFIG_FORMAT_S16LE */ + +#if CONFIG_FORMAT_S24LE +static void dcblock_s24_default(const struct comp_dev *dev, + const struct audio_stream __sparse_cache *source, + const struct audio_stream __sparse_cache *sink, + uint32_t frames) +{ + struct comp_data *cd = comp_get_drvdata(dev); + ae_int32 *src = (ae_int32 *)source->r_ptr; + ae_int32 *dst = (ae_int32 *)sink->w_ptr; + ae_int32 *in; + ae_int32 *out; + ae_int32x2 R, state_x, state_y; + ae_int32x2 in_sample, out_sample; + int ch, i, n; + int nch = source->channels; + const int inc = nch * sizeof(ae_int32); + int samples = nch * frames; + + dcblock_set_circular(source); + while (samples) { + n = audio_stream_samples_without_wrap_s24(sink, dst); + n = MIN(n, samples); + for (ch = 0; ch < nch; ch++) { + in = src + ch; + out = dst + ch; + state_x = cd->state[ch].x_prev; + state_y = cd->state[ch].y_prev; + R = cd->R_coeffs[ch]; + for (i = 0; i < n; i += nch) { + AE_L32_XC(in_sample, in, inc); + in_sample = AE_SLAI32(in_sample, 8); + state_y = dcblock_cal(R, state_x, state_y, in_sample); + state_x = in_sample; + out_sample = AE_SRAI32R(state_y, 8); + out_sample = AE_SLAI32S(out_sample, 8); + out_sample = AE_SRAI32R(out_sample, 8); + AE_S32_L_XP(out_sample, out, inc); + } + cd->state[ch].x_prev = state_x; + cd->state[ch].y_prev = state_y; + } + samples -= n; + dst = audio_stream_wrap(sink, dst + n); + src = audio_stream_wrap(source, src + n); + } +} +#endif /* CONFIG_FORMAT_S24LE */ + +#if CONFIG_FORMAT_S32LE +static void dcblock_s32_default(const struct comp_dev *dev, + const struct audio_stream __sparse_cache *source, + const struct audio_stream __sparse_cache *sink, + uint32_t frames) +{ + struct comp_data *cd = comp_get_drvdata(dev); + ae_int32 *src = (ae_int32 *)source->r_ptr; + ae_int32 *dst = (ae_int32 *)sink->w_ptr; + ae_int32 *in; + ae_int32 *out; + ae_int32x2 R, state_x, state_y; + ae_int32x2 in_sample; + int ch, i, n; + int nch = source->channels; + const int inc = nch * sizeof(ae_int32); + int samples = nch * frames; + + dcblock_set_circular(source); + while (samples) { + n = audio_stream_samples_without_wrap_s32(sink, dst); + n = MIN(n, samples); + for (ch = 0; ch < nch; ch++) { + in = src + ch; + out = dst + ch; + state_x = cd->state[ch].x_prev; + state_y = cd->state[ch].y_prev; + R = cd->R_coeffs[ch]; + for (i = 0; i < n; i += nch) { + AE_L32_XC(in_sample, in, inc); + state_y = dcblock_cal(R, state_x, state_y, in_sample); + state_x = in_sample; + AE_S32_L_XP(state_y, out, inc); + } + cd->state[ch].x_prev = state_x; + cd->state[ch].y_prev = state_y; + } + samples -= n; + dst = audio_stream_wrap(sink, dst + n); + src = audio_stream_wrap(source, src + n); + } +} +#endif /* CONFIG_FORMAT_S32LE */ + +const struct dcblock_func_map dcblock_fnmap[] = { +/* { SOURCE_FORMAT , PROCESSING FUNCTION } */ +#if CONFIG_FORMAT_S16LE + { SOF_IPC_FRAME_S16_LE, dcblock_s16_default }, +#endif /* CONFIG_FORMAT_S16LE */ +#if CONFIG_FORMAT_S24LE + { SOF_IPC_FRAME_S24_4LE, dcblock_s24_default }, +#endif /* CONFIG_FORMAT_S24LE */ +#if CONFIG_FORMAT_S32LE + { SOF_IPC_FRAME_S32_LE, dcblock_s32_default }, +#endif /* CONFIG_FORMAT_S32LE */ +}; + +const size_t dcblock_fncount = ARRAY_SIZE(dcblock_fnmap); +#endif diff --git a/src/include/sof/audio/dcblock/dcblock.h b/src/include/sof/audio/dcblock/dcblock.h index 74dc5b8b50be..ca9662904de9 100644 --- a/src/include/sof/audio/dcblock/dcblock.h +++ b/src/include/sof/audio/dcblock/dcblock.h @@ -18,6 +18,8 @@ # include # if XCHAL_HAVE_HIFI4 # define DCBLOCK_HIFI4 +# elif XCHAL_HAVE_HIFI3 +# define DCBLOCK_HIFI3 # else # define DCBLOCK_GENERIC # endif diff --git a/zephyr/CMakeLists.txt b/zephyr/CMakeLists.txt index e950ce6206c3..685224210708 100644 --- a/zephyr/CMakeLists.txt +++ b/zephyr/CMakeLists.txt @@ -467,6 +467,7 @@ zephyr_library_sources_ifdef(CONFIG_COMP_ASRC zephyr_library_sources_ifdef(CONFIG_COMP_DCBLOCK ${SOF_AUDIO_PATH}/dcblock/dcblock_generic.c ${SOF_AUDIO_PATH}/dcblock/dcblock.c + ${SOF_AUDIO_PATH}/dcblock/dcblock_hifi3.c ${SOF_AUDIO_PATH}/dcblock/dcblock_hifi4.c )