Skip to content

Commit

Permalink
Math: IIR DF1: Optimize IIR core for Xtensa HiFi5
Browse files Browse the repository at this point in the history
This patch adds iir_df1_hifi5.c that is a modified version
of iir_df1_hifi4.c. The coefficients and data load is 128 bits
when possible. The data load is fastest non-aligned, so the
iir->delay address needs to be 128 bits / 16 bytes aligned.

The updated version saves in sof-testbench4 run 2.1 MCPS, from
10.4 to 8.3 MCPS for used 10th order filter. The used test run
command for HiFi5 build of sof-testench4 was
"scripts/sof-testbench-helper.sh -x -m eqiir".

Signed-off-by: Seppo Ingalsuo <[email protected]>
  • Loading branch information
singalsu authored and kv2019i committed Dec 3, 2024
1 parent 427c8fc commit df1cf7e
Show file tree
Hide file tree
Showing 5 changed files with 123 additions and 2 deletions.
2 changes: 1 addition & 1 deletion src/math/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ add_local_sources_ifdef(CONFIG_MATH_IIR_DF2T sof
iir_df2t_generic.c iir_df2t_hifi3.c iir_df2t.c)

add_local_sources_ifdef(CONFIG_MATH_IIR_DF1 sof
iir_df1_generic.c iir_df1_hifi3.c iir_df1_hifi4.c iir_df1.c)
iir_df1_generic.c iir_df1_hifi3.c iir_df1_hifi4.c iir_df1_hifi5.c iir_df1.c)

if(CONFIG_MATH_WINDOW)
add_local_sources(sof window.c)
Expand Down
2 changes: 1 addition & 1 deletion src/math/iir_df1_hifi4.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

#include <rtos/symbol.h>

#if SOF_USE_MIN_HIFI(4, FILTER)
#if SOF_USE_HIFI(4, FILTER)

/*
* Direct form I second order filter block (biquad)
Expand Down
119 changes: 119 additions & 0 deletions src/math/iir_df1_hifi5.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
// SPDX-License-Identifier: BSD-3-Clause
//
// Copyright(c) 2022-2024 Intel Corporation.
//
// Author: Seppo Ingalsuo <[email protected]>

#include <stdint.h>
#include <stddef.h>
#include <errno.h>
#include <sof/audio/format.h>
#include <sof/math/iir_df1.h>
#include <user/eq.h>
#include <sof/common.h>

#include <rtos/symbol.h>

#if SOF_USE_MIN_HIFI(5, FILTER)

/*
* Direct form I second order filter block (biquad)
*
* +----+ +---+ +-------+
* X(z) ---o--->| b0 |---> + --+-------------o--->| g |--->| shift |---> Y(z)
* | +----+ ^ ^ | +---+ +-------+
* | | | |
* +------+ | | +------+
* | z^-1 | | | | z^-1 |
* +------+ | | +------+
* | +----+ | | +----+ |
* o--->| b1 |---> + + <---| a1 |---o
* | +----+ ^ ^ +----+ |
* | | | |
* +------+ | | +------+
* | z^-1 | | | | z^-1 |
* +------+ | | +------+
* | ^ ^ |
* | +----+ | | +----+ |
* o--->| b2 |---> + +<--- | a2 |---o
* +----+ +----+
*
* y[n] = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
* the a1 a2 has been negated during calculation
*/

/* Series DF1 IIR */

/* 32 bit data, 32 bit coefficients and 32 bit state variables */

int32_t iir_df1(struct iir_state_df1 *iir, int32_t x)
{
ae_valignx2 coef_align;
ae_valignx2 data_r_align;
ae_valignx2 data_w_align = AE_ZALIGN128();
ae_f64 acc;
ae_int32x2 delay_y2y1;
ae_int32x2 delay_x2x1;
ae_int32x2 coef_a2a1;
ae_int32x2 coef_b2b1;
ae_int32x2 coef_b0;
ae_int32x2 gain;
ae_int32x2 shift;
ae_int32 in;
ae_int32 out = 0;
ae_int32x4 *coefp = (ae_int32x4 *)iir->coef;
ae_int32x4 *delay_r = (ae_int32x4 *)iir->delay;
ae_int32x4 *delay_w = delay_r;
int i;
int j;
int nseries = iir->biquads_in_series;

/* Bypass is set with number of biquads set to zero. */
if (!iir->biquads)
return x;

/* Coefficients order in coef[] is {a2, a1, b2, b1, b0, shift, gain} */
/* Delay order in state[] is {y(n - 2), y(n - 1), x(n - 2), x(n - 1)} */
data_r_align = AE_LA128_PP(delay_r);
for (j = 0; j < iir->biquads; j += nseries) {
in = x;
for (i = 0; i < nseries; i++) {
/* Load data */
AE_LA32X2X2_IP(delay_y2y1, delay_x2x1, data_r_align, delay_r);

/* Load coefficients */
coef_align = AE_LA128_PP(coefp);
AE_LA32X2X2_IP(coef_a2a1, coef_b2b1, coef_align, coefp);
AE_L32_IP(coef_b0, (ae_int32 *)coefp, 4);
AE_L32_IP(shift, (ae_int32 *)coefp, 4);
AE_L32_IP(gain, (ae_int32 *)coefp, 4);

acc = AE_MULF32RA_HH(coef_b0, in); /* acc = b0 * in */
AE_MULAAFD32RA_HH_LL(acc, coef_a2a1, delay_y2y1); /* + a2 * y2 + a1 * y1 */
AE_MULAAFD32RA_HH_LL(acc, coef_b2b1, delay_x2x1); /* + b2 * x2 + b1 * x1 */
AE_PKSR32(delay_y2y1, acc, 1); /* y2 = y1, y1 = acc(q1.31) */
delay_x2x1 = AE_SEL32_LL(delay_x2x1, in); /* x2 = x1, x1 = in */

/* Store data */
AE_SA32X2X2_IP(delay_y2y1, delay_x2x1, data_w_align, delay_w);

/* Apply gain */
acc = AE_MULF32R_LL(gain, delay_y2y1); /* acc = gain * y1 */
acc = AE_SLAI64S(acc, 17); /* Convert to Q17.47 */

/* Apply biquad output shift right parameter and then
* round and saturate to 32 bits Q1.31.
*/
acc = AE_SRAA64(acc, shift);
in = AE_ROUND32F48SSYM(acc);
}
/* Output of previous section is in variable in */
out = AE_F32_ADDS_F32(out, in);
}

AE_SA128POS_FP(data_w_align, delay_w);
return out;
}
EXPORT_SYMBOL(iir_df1);

#endif
1 change: 1 addition & 0 deletions test/cmocka/src/audio/eq_iir/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ add_library(audio_for_eq_iir STATIC
${PROJECT_SOURCE_DIR}/src/math/iir_df1_generic.c
${PROJECT_SOURCE_DIR}/src/math/iir_df1_hifi3.c
${PROJECT_SOURCE_DIR}/src/math/iir_df1_hifi4.c
${PROJECT_SOURCE_DIR}/src/math/iir_df1_hifi5.c
${PROJECT_SOURCE_DIR}/src/math/iir_df2t.c
${PROJECT_SOURCE_DIR}/src/math/iir_df2t_generic.c
${PROJECT_SOURCE_DIR}/src/math/iir_df2t_hifi3.c
Expand Down
1 change: 1 addition & 0 deletions zephyr/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -669,6 +669,7 @@ zephyr_library_sources_ifdef(CONFIG_MATH_IIR_DF1
${SOF_MATH_PATH}/iir_df1_generic.c
${SOF_MATH_PATH}/iir_df1_hifi3.c
${SOF_MATH_PATH}/iir_df1_hifi4.c
${SOF_MATH_PATH}/iir_df1_hifi5.c
${SOF_MATH_PATH}/iir_df1.c
)

Expand Down

0 comments on commit df1cf7e

Please sign in to comment.