From db6909322b7258ae62e24eae81f38559892147a0 Mon Sep 17 00:00:00 2001 From: Nicolas Roussin Date: Wed, 22 Oct 2025 15:10:20 +0100 Subject: [PATCH 01/19] Implement BASOP W_min and W_max functions. --- lib_basop/enh64.c | 97 +++++++++++++++++++++++++++++++++++++++++++- lib_basop/enh64.h | 2 + lib_debug/wmc_auto.c | 2 +- lib_debug/wmc_auto.h | 2 + 4 files changed, 101 insertions(+), 2 deletions(-) diff --git a/lib_basop/enh64.c b/lib_basop/enh64.c index 8bffb620c..b53adb79a 100644 --- a/lib_basop/enh64.c +++ b/lib_basop/enh64.c @@ -40,7 +40,103 @@ *****************************************************************************/ #ifdef ENH_64_BIT_OPERATOR +/*______________________________________________________________________________ +| | +| Function Name : W_min | +| | +| Purpose : | +| | +| Compares L64_var1 and L64_var2 and returns the minimum value. | +| | +| Complexity weight : 1 | +| | +| Inputs : | +| | +| L64_var1 64 bit long signed integer (Word64) whose value falls in the | +| range : 0x80000000 00000000LL <= L64_var1 <= 0x7fffffff ffffffffLL. | +| | +| L64_var2 64 bit long signed integer (Word64) whose value falls in the | +| range : 0x80000000 00000000LL <= L64_var2 <= 0x7fffffff ffffffffLL. | +| | +| Outputs : | +| | +| none | +| | +| Return Value : | +| | +| L64_var_out | +| 64 bit long signed integer (Word64) whose value falls in the | +| range : 0x80000000 00000000LL <= L64_var_out <= 0x7fffffff ffffffffLL. | +|______________________________________________________________________________| +*/ +Word64 W_min( Word64 L64_var1, Word64 L64_var2 ) +{ + Word64 L64_var_out; + if ( L64_var1 <= L64_var2 ) + { + L64_var_out = L64_var1; + } + else + { + L64_var_out = L64_var2; + } + +#ifdef WMOPS + multiCounter[currCounter].W_min++; +#endif /* ifdef WMOPS */ + + return ( L64_var_out ); +} + +/*______________________________________________________________________________ +| | +| Function Name : W_max | +| | +| Purpose : | +| | +| Compares L64_var1 and L64_var2 and returns the maximum value. | +| | +| Complexity weight : 1 | +| | +| Inputs : | +| | +| L64_var1 64 bit long signed integer (Word64) whose value falls in the | +| range : 0x80000000 00000000LL <= L64_var1 <= 0x7fffffff ffffffffLL. | +| | +| L64_var2 64 bit long signed integer (Word64) whose value falls in the | +| range : 0x80000000 00000000LL <= L64_var2 <= 0x7fffffff ffffffffLL. | +| | +| Outputs : | +| | +| none | +| | +| Return Value : | +| | +| L64_var_out | +| 64 bit long signed integer (Word64) whose value falls in the | +| range : 0x80000000 00000000LL <= L64_var_out <= 0x7fffffff ffffffffLL. | +|______________________________________________________________________________| +*/ +Word64 W_max( Word64 L64_var1, Word64 L64_var2 ) +{ + Word64 L64_var_out; + + if ( L64_var1 >= L64_var2 ) + { + L64_var_out = L64_var1; + } + else + { + L64_var_out = L64_var2; + } + +#ifdef WMOPS + multiCounter[currCounter].W_max++; +#endif /* ifdef WMOPS */ + + return ( L64_var_out ); +} /*___________________________________________________________________________ | | @@ -84,7 +180,6 @@ Word64 W_add_nosat( Word64 L64_var1, Word64 L64_var2 ) return L64_var_out; } - /*___________________________________________________________________________ | | | Function Name : W_sub_nosat | diff --git a/lib_basop/enh64.h b/lib_basop/enh64.h index c3896bb0d..d690708da 100644 --- a/lib_basop/enh64.h +++ b/lib_basop/enh64.h @@ -21,6 +21,8 @@ * *****************************************************************************/ #ifdef ENH_64_BIT_OPERATOR +Word64 W_min( Word64 L64_var1, Word64 L64_var2 ); +Word64 W_max( Word64 L64_var1, Word64 L64_var2 ); Word64 W_add_nosat( Word64 L64_var1, Word64 L64_var2 ); Word64 W_sub_nosat( Word64 L64_var1, Word64 L64_var2 ); Word64 W_shl( Word64 L64_var1, Word16 var2 ); diff --git a/lib_debug/wmc_auto.c b/lib_debug/wmc_auto.c index 5afd9de16..9d4d573f0 100644 --- a/lib_debug/wmc_auto.c +++ b/lib_debug/wmc_auto.c @@ -133,7 +133,7 @@ static BASIC_OP op_weight = { #ifdef ENH_64_BIT_OPERATOR /* Weights of new 64 bit basops */ , - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 #endif /* #ifdef ENH_64_BIT_OPERATOR */ #ifdef ENH_32_BIT_OPERATOR diff --git a/lib_debug/wmc_auto.h b/lib_debug/wmc_auto.h index 64e2c751a..6dff36f50 100644 --- a/lib_debug/wmc_auto.h +++ b/lib_debug/wmc_auto.h @@ -877,6 +877,8 @@ typedef struct /* New 64 bit basops */ #ifdef ENH_64_BIT_OPERATOR unsigned int move64; /* Complexity Weight of 1 */ + unsigned int W_min; /* Complexity Weight of 1 */ + unsigned int W_max; /* Complexity Weight of 1 */ unsigned int W_add_nosat; /* Complexity Weight of 1 */ unsigned int W_sub_nosat; /* Complexity Weight of 1 */ unsigned int W_shl; /* Complexity Weight of 1 */ -- GitLab From 837da740df1c1e4271842c656a31dda5c1672f74 Mon Sep 17 00:00:00 2001 From: Nicolas Roussin Date: Mon, 27 Oct 2025 12:12:47 +0000 Subject: [PATCH 02/19] Addressed Thomas Dettbarn's comments. --- lib_basop/enh64.c | 97 ------------------------------------------ lib_basop/enh64.h | 104 +++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 102 insertions(+), 99 deletions(-) diff --git a/lib_basop/enh64.c b/lib_basop/enh64.c index b53adb79a..7812f5af0 100644 --- a/lib_basop/enh64.c +++ b/lib_basop/enh64.c @@ -40,103 +40,6 @@ *****************************************************************************/ #ifdef ENH_64_BIT_OPERATOR -/*______________________________________________________________________________ -| | -| Function Name : W_min | -| | -| Purpose : | -| | -| Compares L64_var1 and L64_var2 and returns the minimum value. | -| | -| Complexity weight : 1 | -| | -| Inputs : | -| | -| L64_var1 64 bit long signed integer (Word64) whose value falls in the | -| range : 0x80000000 00000000LL <= L64_var1 <= 0x7fffffff ffffffffLL. | -| | -| L64_var2 64 bit long signed integer (Word64) whose value falls in the | -| range : 0x80000000 00000000LL <= L64_var2 <= 0x7fffffff ffffffffLL. | -| | -| Outputs : | -| | -| none | -| | -| Return Value : | -| | -| L64_var_out | -| 64 bit long signed integer (Word64) whose value falls in the | -| range : 0x80000000 00000000LL <= L64_var_out <= 0x7fffffff ffffffffLL. | -|______________________________________________________________________________| -*/ -Word64 W_min( Word64 L64_var1, Word64 L64_var2 ) -{ - Word64 L64_var_out; - - if ( L64_var1 <= L64_var2 ) - { - L64_var_out = L64_var1; - } - else - { - L64_var_out = L64_var2; - } - -#ifdef WMOPS - multiCounter[currCounter].W_min++; -#endif /* ifdef WMOPS */ - - return ( L64_var_out ); -} - -/*______________________________________________________________________________ -| | -| Function Name : W_max | -| | -| Purpose : | -| | -| Compares L64_var1 and L64_var2 and returns the maximum value. | -| | -| Complexity weight : 1 | -| | -| Inputs : | -| | -| L64_var1 64 bit long signed integer (Word64) whose value falls in the | -| range : 0x80000000 00000000LL <= L64_var1 <= 0x7fffffff ffffffffLL. | -| | -| L64_var2 64 bit long signed integer (Word64) whose value falls in the | -| range : 0x80000000 00000000LL <= L64_var2 <= 0x7fffffff ffffffffLL. | -| | -| Outputs : | -| | -| none | -| | -| Return Value : | -| | -| L64_var_out | -| 64 bit long signed integer (Word64) whose value falls in the | -| range : 0x80000000 00000000LL <= L64_var_out <= 0x7fffffff ffffffffLL. | -|______________________________________________________________________________| -*/ -Word64 W_max( Word64 L64_var1, Word64 L64_var2 ) -{ - Word64 L64_var_out; - - if ( L64_var1 >= L64_var2 ) - { - L64_var_out = L64_var1; - } - else - { - L64_var_out = L64_var2; - } - -#ifdef WMOPS - multiCounter[currCounter].W_max++; -#endif /* ifdef WMOPS */ - - return ( L64_var_out ); -} /*___________________________________________________________________________ | | diff --git a/lib_basop/enh64.h b/lib_basop/enh64.h index d690708da..ab21d5b0c 100644 --- a/lib_basop/enh64.h +++ b/lib_basop/enh64.h @@ -21,8 +21,108 @@ * *****************************************************************************/ #ifdef ENH_64_BIT_OPERATOR -Word64 W_min( Word64 L64_var1, Word64 L64_var2 ); -Word64 W_max( Word64 L64_var1, Word64 L64_var2 ); + + +/*______________________________________________________________________________ +| | +| Function Name : W_min | +| | +| Purpose : | +| | +| Compares L64_var1 and L64_var2 and returns the minimum value. | +| | +| Complexity weight : 1 | +| | +| Inputs : | +| | +| L64_var1 64 bit long signed integer (Word64) whose value falls in the | +| range : 0x80000000 00000000LL <= L64_var1 <= 0x7fffffff ffffffffLL. | +| | +| L64_var2 64 bit long signed integer (Word64) whose value falls in the | +| range : 0x80000000 00000000LL <= L64_var2 <= 0x7fffffff ffffffffLL. | +| | +| Outputs : | +| | +| none | +| | +| Return Value : | +| | +| L64_var_out | +| 64 bit long signed integer (Word64) whose value falls in the | +| range : 0x80000000 00000000LL <= L64_var_out <= 0x7fffffff ffffffffLL. | +|______________________________________________________________________________| +*/ +static __inline Word64 W_min( Word64 L64_var1, Word64 L64_var2 ) +{ + Word64 L64_var_out; + + if ( L64_var1 <= L64_var2 ) + { + L64_var_out = L64_var1; + } + else + { + L64_var_out = L64_var2; + } + +#ifdef WMOPS + multiCounter[currCounter].W_min++; +#endif /* ifdef WMOPS */ + + return ( L64_var_out ); +} + + +/*______________________________________________________________________________ +| | +| Function Name : W_max | +| | +| Purpose : | +| | +| Compares L64_var1 and L64_var2 and returns the maximum value. | +| | +| Complexity weight : 1 | +| | +| Inputs : | +| | +| L64_var1 64 bit long signed integer (Word64) whose value falls in the | +| range : 0x80000000 00000000LL <= L64_var1 <= 0x7fffffff ffffffffLL. | +| | +| L64_var2 64 bit long signed integer (Word64) whose value falls in the | +| range : 0x80000000 00000000LL <= L64_var2 <= 0x7fffffff ffffffffLL. | +| | +| Outputs : | +| | +| none | +| | +| Return Value : | +| | +| L64_var_out | +| 64 bit long signed integer (Word64) whose value falls in the | +| range : 0x80000000 00000000LL <= L64_var_out <= 0x7fffffff ffffffffLL. | +|______________________________________________________________________________| +*/ +static __inline Word64 W_max( Word64 L64_var1, Word64 L64_var2 ) +{ + Word64 L64_var_out; + + if ( L64_var1 >= L64_var2 ) + { + L64_var_out = L64_var1; + } + else + { + L64_var_out = L64_var2; + } + +#ifdef WMOPS + multiCounter[currCounter].W_max++; +#endif /* ifdef WMOPS */ + + return ( L64_var_out ); +} + + Word64 W_add_nosat( Word64 L64_var1, Word64 L64_var2 ); Word64 W_sub_nosat( Word64 L64_var1, Word64 L64_var2 ); Word64 W_shl( Word64 L64_var1, Word16 var2 ); -- GitLab From 024b3128dfee95ca8cc869625aae47673a70f8be Mon Sep 17 00:00:00 2001 From: Nicolas Roussin Date: Mon, 27 Oct 2025 12:15:21 +0000 Subject: [PATCH 03/19] Revert changes in enh64.c. --- lib_basop/enh64.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib_basop/enh64.c b/lib_basop/enh64.c index 7812f5af0..8bffb620c 100644 --- a/lib_basop/enh64.c +++ b/lib_basop/enh64.c @@ -41,6 +41,7 @@ #ifdef ENH_64_BIT_OPERATOR + /*___________________________________________________________________________ | | | Function Name : W_add_nosat | @@ -83,6 +84,7 @@ Word64 W_add_nosat( Word64 L64_var1, Word64 L64_var2 ) return L64_var_out; } + /*___________________________________________________________________________ | | | Function Name : W_sub_nosat | -- GitLab From a791a1528846174d2c55887b248b0fd3b05cf1a2 Mon Sep 17 00:00:00 2001 From: Nicolas Roussin Date: Thu, 6 Nov 2025 10:02:44 +0000 Subject: [PATCH 04/19] Optimize eig2x2_fx part 1. --- lib_dec/ivas_ism_metadata_dec_fx.c | 2 +- .../ivas_dirac_dec_binaural_functions_fx.c | 131 ++++++++++++++++++ 2 files changed, 132 insertions(+), 1 deletion(-) diff --git a/lib_dec/ivas_ism_metadata_dec_fx.c b/lib_dec/ivas_ism_metadata_dec_fx.c index 54cc5da15..4d3c06b9c 100644 --- a/lib_dec/ivas_ism_metadata_dec_fx.c +++ b/lib_dec/ivas_ism_metadata_dec_fx.c @@ -30,7 +30,6 @@ *******************************************************************************************************/ -#include "move.h" #include #include "options.h" #include "ivas_cnst.h" @@ -40,6 +39,7 @@ #include "ivas_stat_enc.h" #include #include "wmc_auto.h" +#include "move.h" #include "ivas_prot_fx.h" diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c index 3211b0e46..c689c449a 100644 --- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c +++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c @@ -3518,6 +3518,31 @@ static void ivas_dirac_dec_binaural_check_and_switch_transports_headtracked_fx( return; } +#if 0 +static void check( + Word32 computed_fx, + Word16 computed_q, + Word32 expected_fx, + Word16 expected_q, + Word32 max_abs_err ); + +static void check( + Word32 computed_fx, + Word16 computed_q, + Word32 expected_fx, + Word16 expected_q, + Word32 max_abs_err ) +{ + Word16 qd = computed_q - expected_q; + Word32 cf = computed_fx >> +max( qd, 0 ); + Word32 ef = expected_fx >> -min( qd, 0 ); + Word32 abs_error = abs( cf - ef ); + if ( abs_error >= max_abs_err ) + { + assert( false ); + } +} +#endif static void eig2x2_fx( const Word32 E1_fx, /*q_E*/ @@ -3532,6 +3557,7 @@ static void eig2x2_fx( Word32 D_fx[BINAURAL_CHANNELS], /*q_D*/ Word16 *q_D ) { +#if 0 Word16 chA, chB, ch; Word32 s_fx, normVal_fx, crossSquare_fx, a_fx, pm_fx, add_fx; Word32 tmp1, tmp2, tmp3, e1, e2, c_re, c_im; @@ -3672,7 +3698,112 @@ static void eig2x2_fx( *q_D = sub( q_tmp2, 1 ); move16(); } +#else + Word16 chA, chB, ch; + FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ ) + { + FOR( chB = 0; chB < BINAURAL_CHANNELS; chB++ ) + { + Ure_fx[chA][chB] = 0; + move32(); + Uim_fx[chA][chB] = 0; + move32(); + } + } + // =================================================================================================== + /*crossSquare_fx = (c_re * c_re) + (c_im * c_im) + a_fx = (e1 + e2) * (e1 + e2) - 4.0f * ((e1 * e2) - crossSquare_fx) = (e1 - e2)^2 + 4 * crossSquare_fx + pm_fx = 0.5f * sqrtf(max(0.0f, a_fx)) + add_fx = 0.5f * (e1 + e2)*/ + + Word16 q1, q2, qm, qd, lshift; + + // (e1 - e2)^2 -> Q: 2 * q_E + q1 = shl( q_E, 1 ); + // 4 * ((c_re * c_re) + (c_im * c_im)) -> Q: 2 * q_C - 2 + q2 = sub( shl( q_C, 1 ), 2 ); + + // (e1 - e2)^2 + Word32 es = L_sub( E1_fx, E2_fx ); + Word64 es2 = W_mult0_32_32( es, es ); + lshift = sub( W_norm( es2 ), 1 ); + es2 = W_shl( es2, lshift ); + q1 = add( q1, lshift ); + if ( !es2 ) + { + q1 = 63; + move16(); + } + + // 4 * ((c_re * c_re) + (c_im * c_im)) + Word64 cs = W_add( W_mult0_32_32( Cre_fx, Cre_fx ), W_mult0_32_32( Cim_fx, Cim_fx ) ); // 2*q_C-2 + lshift = sub( W_norm( cs ), 1 ); + cs = W_shl( cs, lshift ); + q2 = add( q2, lshift ); + if ( !cs ) + { + q2 = 63; + move16(); + } + + Word32 crossSquare_fx = (Word32) ( cs >> 32 ); // FIXME + Word16 q_crossSquare = 2 * q_C + lshift - 32; // FIXME + + // a = max(0, (e1 - e2)^2 + 4 * crossSquare_fx) + qm = s_min( q1, q2 ); + qd = sub( q1, q2 ); + Word64 a = W_max( W_add( W_shr( es2, s_max( qd, 0 ) ), W_shr( cs, negate( s_min( qd, 0 ) ) ) ), 0 ); + + // pm = 0.5f * sqrtf(a) + // a = 0.5f * ( E1 + E2 ); + lshift = W_norm( a ); + Word32 pm = W_extract_h( W_shl( a, lshift ) ); + Word16 e = sub( sub( 63, lshift ), qm ); + pm = L_shr( Sqrt32( pm, &e ), 1 ); + q2 = sub( 31, e ); + // check( pm, q2, pm_fx, q_tmp2, 1 << 16 ); + a = L_add( E1_fx, E2_fx ); + lshift = sub( norm_l( a ), 1 ); + a = W_shl( a, lshift ); + q1 = add( add( q_E, 1 ), lshift ); + // check( a, q1, add_fx, q_tmp1, 1 << 16 ); + + Word32 add_fx = a; // FIXME + Word16 q_tmp1 = q1; // FIXME + Word32 pm_fx = pm; // FIXME + Word16 q_tmp2 = q2; // FIXME + + // D[0] = add + pm; + // D[1] = max( 0.0f, add - pm ); + qm = s_min( q1, q2 ); + qd = sub( q1, q2 ); + a = W_shr( a, s_max( qd, 0 ) ); + pm = W_shr( pm, negate( s_min( qd, 0 ) ) ); + Word32 d0 = L_add( a, pm ); + Word32 d1 = L_max( L_sub( a, pm ), 0 ); + // check( d0, qm, D_fx[0], *q_D, 1 << 16 ); + // check( d1, qm, D_fx[1], *q_D, 1 << 16 ); + + D_fx[0] = d0; // FIXME + D_fx[1] = d1; // FIXME + *q_D = qm; // FIXME + + Word32 tmp1, tmp2, tmp3, e1, e2, s_fx, normVal_fx, c_re, c_im; // FIXME + Word16 q_U_1, q_U_2, q_c, q_e, exp, exp_tmp3; // FIXME + Word32 epsilon_mant = 1180591621; // FIXME + Word16 epsilon_exp = -39; // FIXME + + exp = sub( get_min_scalefactor( Cre_fx, Cim_fx ), 2 ); // FIXME + c_re = L_shl( Cre_fx, exp ); // FIXME + c_im = L_shl( Cim_fx, exp ); // FIXME + q_c = add( q_C, exp ); // FIXME + + exp = sub( get_min_scalefactor( E1_fx, E2_fx ), 2 ); // FIXME + e1 = L_shl( E1_fx, exp ); // FIXME + e2 = L_shl( E2_fx, exp ); // FIXME + q_e = add( q_E, exp ); // FIXME +#endif /* Numeric case, when input is practically zeros */ // IF( D_fx[0] < EPSILON_FX ) -- GitLab From f4d6f56eb196c8dd629d0e7013da5f737c35a32d Mon Sep 17 00:00:00 2001 From: Nicolas Roussin Date: Thu, 6 Nov 2025 17:51:15 +0000 Subject: [PATCH 05/19] Fix MSVC build failure. --- lib_rend/ivas_dirac_dec_binaural_functions_fx.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c index c689c449a..4ddbdcc45 100644 --- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c +++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c @@ -3763,13 +3763,13 @@ static void eig2x2_fx( pm = L_shr( Sqrt32( pm, &e ), 1 ); q2 = sub( 31, e ); // check( pm, q2, pm_fx, q_tmp2, 1 << 16 ); - a = L_add( E1_fx, E2_fx ); - lshift = sub( norm_l( a ), 1 ); - a = W_shl( a, lshift ); + Word32 ea = L_add( E1_fx, E2_fx ); + lshift = sub( norm_l( ea ), 1 ); + ea = L_shl( ea, lshift ); q1 = add( add( q_E, 1 ), lshift ); - // check( a, q1, add_fx, q_tmp1, 1 << 16 ); + // check( ea, q1, add_fx, q_tmp1, 1 << 16 ); - Word32 add_fx = a; // FIXME + Word32 add_fx = ea; // FIXME Word16 q_tmp1 = q1; // FIXME Word32 pm_fx = pm; // FIXME Word16 q_tmp2 = q2; // FIXME -- GitLab From db555574e65c07102d845662be1e0531cb3ee055 Mon Sep 17 00:00:00 2001 From: Nicolas Roussin Date: Thu, 6 Nov 2025 17:54:13 +0000 Subject: [PATCH 06/19] Apply clang format. --- lib_rend/ivas_dirac_dec_binaural_functions_fx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c index 4ddbdcc45..4fc279344 100644 --- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c +++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c @@ -3769,7 +3769,7 @@ static void eig2x2_fx( q1 = add( add( q_E, 1 ), lshift ); // check( ea, q1, add_fx, q_tmp1, 1 << 16 ); - Word32 add_fx = ea; // FIXME + Word32 add_fx = ea; // FIXME Word16 q_tmp1 = q1; // FIXME Word32 pm_fx = pm; // FIXME Word16 q_tmp2 = q2; // FIXME -- GitLab From 9219bfbcf5650e3bb98a4ec81a0a1f1ad3ba647d Mon Sep 17 00:00:00 2001 From: Nicolas Roussin Date: Thu, 6 Nov 2025 18:08:14 +0000 Subject: [PATCH 07/19] Fix MSVC build failure. --- lib_rend/ivas_dirac_dec_binaural_functions_fx.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c index 4fc279344..b79a62c26 100644 --- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c +++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c @@ -3778,10 +3778,10 @@ static void eig2x2_fx( // D[1] = max( 0.0f, add - pm ); qm = s_min( q1, q2 ); qd = sub( q1, q2 ); - a = W_shr( a, s_max( qd, 0 ) ); - pm = W_shr( pm, negate( s_min( qd, 0 ) ) ); - Word32 d0 = L_add( a, pm ); - Word32 d1 = L_max( L_sub( a, pm ), 0 ); + ea = L_shr( ea, s_max( qd, 0 ) ); + pm = L_shr( pm, negate( s_min( qd, 0 ) ) ); + Word32 d0 = L_add( ea, pm ); + Word32 d1 = L_max( L_sub( ea, pm ), 0 ); // check( d0, qm, D_fx[0], *q_D, 1 << 16 ); // check( d1, qm, D_fx[1], *q_D, 1 << 16 ); -- GitLab From 236a187258b5cbf78dfcf68e5e1d5a5f35c717af Mon Sep 17 00:00:00 2001 From: Nicolas Roussin Date: Fri, 7 Nov 2025 10:34:34 +0000 Subject: [PATCH 08/19] Optimize eig2x2_fx part 2. --- .../ivas_dirac_dec_binaural_functions_fx.c | 94 +++++++++++++++++-- 1 file changed, 85 insertions(+), 9 deletions(-) diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c index b79a62c26..5f939ae4c 100644 --- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c +++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c @@ -3518,7 +3518,7 @@ static void ivas_dirac_dec_binaural_check_and_switch_transports_headtracked_fx( return; } -#if 0 +#if 1 static void check( Word32 computed_fx, Word16 computed_q, @@ -3557,7 +3557,7 @@ static void eig2x2_fx( Word32 D_fx[BINAURAL_CHANNELS], /*q_D*/ Word16 *q_D ) { -#if 0 + // #if 0 Word16 chA, chB, ch; Word32 s_fx, normVal_fx, crossSquare_fx, a_fx, pm_fx, add_fx; Word32 tmp1, tmp2, tmp3, e1, e2, c_re, c_im; @@ -3698,7 +3698,8 @@ static void eig2x2_fx( *q_D = sub( q_tmp2, 1 ); move16(); } -#else +// #else +#if 0 Word16 chA, chB, ch; FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ ) { @@ -3710,6 +3711,7 @@ static void eig2x2_fx( move32(); } } +#endif // =================================================================================================== /*crossSquare_fx = (c_re * c_re) + (c_im * c_im) @@ -3747,13 +3749,16 @@ static void eig2x2_fx( move16(); } +#if 0 Word32 crossSquare_fx = (Word32) ( cs >> 32 ); // FIXME Word16 q_crossSquare = 2 * q_C + lshift - 32; // FIXME +#endif + Word16 csq = 2 * q_C + lshift; // FIXME // a = max(0, (e1 - e2)^2 + 4 * crossSquare_fx) qm = s_min( q1, q2 ); qd = sub( q1, q2 ); - Word64 a = W_max( W_add( W_shr( es2, s_max( qd, 0 ) ), W_shr( cs, negate( s_min( qd, 0 ) ) ) ), 0 ); + Word64 a = W_max( W_add( W_shr( es2, s_max( qd, 0 ) ), W_shl( cs, s_min( qd, 0 ) ) ), 0 ); // pm = 0.5f * sqrtf(a) // a = 0.5f * ( E1 + E2 ); @@ -3762,29 +3767,32 @@ static void eig2x2_fx( Word16 e = sub( sub( 63, lshift ), qm ); pm = L_shr( Sqrt32( pm, &e ), 1 ); q2 = sub( 31, e ); - // check( pm, q2, pm_fx, q_tmp2, 1 << 16 ); + check( pm, q2, pm_fx, q_tmp2, 1 << 16 ); Word32 ea = L_add( E1_fx, E2_fx ); lshift = sub( norm_l( ea ), 1 ); ea = L_shl( ea, lshift ); q1 = add( add( q_E, 1 ), lshift ); - // check( ea, q1, add_fx, q_tmp1, 1 << 16 ); + check( ea, q1, add_fx, q_tmp1, 1 << 16 ); +#if 0 Word32 add_fx = ea; // FIXME Word16 q_tmp1 = q1; // FIXME Word32 pm_fx = pm; // FIXME Word16 q_tmp2 = q2; // FIXME +#endif // D[0] = add + pm; // D[1] = max( 0.0f, add - pm ); qm = s_min( q1, q2 ); qd = sub( q1, q2 ); ea = L_shr( ea, s_max( qd, 0 ) ); - pm = L_shr( pm, negate( s_min( qd, 0 ) ) ); + pm = L_shl( pm, s_min( qd, 0 ) ); Word32 d0 = L_add( ea, pm ); Word32 d1 = L_max( L_sub( ea, pm ), 0 ); - // check( d0, qm, D_fx[0], *q_D, 1 << 16 ); - // check( d1, qm, D_fx[1], *q_D, 1 << 16 ); + check( d0, qm, D_fx[0], *q_D, 1 << 16 ); + check( d1, qm, D_fx[1], *q_D, 1 << 16 ); +#if 0 D_fx[0] = d0; // FIXME D_fx[1] = d1; // FIXME *q_D = qm; // FIXME @@ -3804,6 +3812,8 @@ static void eig2x2_fx( e2 = L_shl( E2_fx, exp ); // FIXME q_e = add( q_E, exp ); // FIXME #endif + // #endif + /* Numeric case, when input is practically zeros */ // IF( D_fx[0] < EPSILON_FX ) @@ -3875,6 +3885,64 @@ static void eig2x2_fx( move16(); } + // D[ch] - E1 + // D[ch] - E2 + + Word32 arg0, arg1, arg2; + Word32 z1, z2, ad, s; + + qm = s_min( *q_D, q_e ); + qd = sub( *q_D, q_e ); + + arg0 = L_shr( D_fx[ch], s_max( qd, 0 ) ); + arg1 = L_shl( e1, s_min( qd, 0 ) ); + arg2 = L_shl( e2, s_min( qd, 0 ) ); + z1 = L_sub( arg0, arg1 ); + z2 = L_sub( arg0, arg2 ); + + check( z1, qm, tmp1, q_tmp1, 1 << 1 ); + check( z2, qm, tmp2, q_tmp1, 1 << 1 ); + + ad = L_sub( L_abs( z2 ), L_abs( z1 ) ); + if ( ad > 0 ) + { + s = z2; + } + if ( ad <= 0 ) + { + s = z1; + } + + Word64 epsm = ( (Word64) 1180591621 ) << 32; // FIXME + Word16 epsq = 63 - epsilon_exp; // FIXME + + Word16 sq = qm; + Word16 s2q = L_shl( sq, 1 ); + qm = s_min( s2q, csq ); + qm = s_min( qm, epsq ); + + Word64 nval64m; + Word64 nval32m; + Word16 nvalq, nvale; + nval64m = W_shr( W_mult0_32_32( s, s ), L_sub( s2q, qm ) ); + nval64m = W_add( nval64m, W_shr( cs, L_sub( csq, qm ) ) ); + nval64m = W_add( nval64m, W_shr( epsm, L_sub( epsq, qm ) ) ); + lshift = W_norm( nval64m ); + nval64m = W_shl( nval64m, lshift ); + nvalq = add( qm, lshift ); + + Word32 test0m = W_extract_h( nval64m ); // FIXME + Word32 test0q = nvalq; // FIXME + + nval32m = BASOP_Util_Divide3232_Scale_newton( ONE_IN_Q30, W_extract_h( nval64m ), &nvale ); + nvale = sub( nvale, sub( Q30, nvalq - 32 ) ); + nval32m = Sqrt32( nval32m, &nvale ); + nvalq = sub( 31, nvale ); + + Word32 test1m = nval32m; // FIXME + Word32 test1q = nvalq; // FIXME + + IF( GT_32( L_abs( tmp2 ), L_abs( tmp1 ) ) ) { s_fx = tmp2; @@ -3888,11 +3956,15 @@ static void eig2x2_fx( tmp3 = BASOP_Util_Add_Mant32Exp( tmp2, sub( 31, q_tmp2 ), epsilon_mant, epsilon_exp, &exp_tmp3 ); + check( test0m, test0q, tmp3, 31 - exp_tmp3, 1 << 18 ); // TODO: remove + tmp2 = BASOP_Util_Divide3232_Scale_newton( ONE_IN_Q30, tmp3, &exp ); exp = sub( exp, sub( Q30, sub( 31, exp_tmp3 ) ) ); normVal_fx = Sqrt32( tmp2, &exp ); // q_tmp2 q_tmp2 = sub( 31, exp ); + check( test1m, test1q, normVal_fx, q_tmp2, 1 << 18 ); // TODO: remove + q_diff = sub( q_c, q_tmp1 ); IF( q_diff > 0 ) { @@ -3959,11 +4031,15 @@ static void eig2x2_fx( tmp3 = BASOP_Util_Add_Mant32Exp( tmp2, sub( 31, q_tmp2 ), epsilon_mant, epsilon_exp, &exp_tmp3 ); + check( test0m, test0q, tmp3, 31 - exp_tmp3, 1 << 18 ); // TODO: remove + tmp2 = BASOP_Util_Divide3232_Scale_newton( ONE_IN_Q30, tmp3, &exp ); exp = sub( exp, sub( Q30, sub( 31, exp_tmp3 ) ) ); normVal_fx = Sqrt32( tmp2, &exp ); // q_tmp2 q_tmp2 = sub( 31, exp ); + check( test1m, test1q, normVal_fx, q_tmp2, 1 << 18 ); // TODO: remove + q_diff = sub( q_c, q_tmp1 ); IF( q_diff > 0 ) { -- GitLab From 890cd054d1bfd87de7fc961a5762c9d44857f863 Mon Sep 17 00:00:00 2001 From: Nicolas Roussin Date: Mon, 10 Nov 2025 10:11:42 +0000 Subject: [PATCH 09/19] Optimize eig2x2_fx part 3. --- .../ivas_dirac_dec_binaural_functions_fx.c | 612 +++++++++++++----- 1 file changed, 433 insertions(+), 179 deletions(-) diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c index 5f939ae4c..77b7b67c6 100644 --- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c +++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c @@ -3537,13 +3537,366 @@ static void check( Word32 cf = computed_fx >> +max( qd, 0 ); Word32 ef = expected_fx >> -min( qd, 0 ); Word32 abs_error = abs( cf - ef ); - if ( abs_error >= max_abs_err ) + if ( abs_error > max_abs_err ) { assert( false ); } } #endif +Word32 __pm_fx; +Word16 __pm_q; +Word32 __add_fx; +Word16 __add_q; + +Word32 __as[BINAURAL_CHANNELS]; + +Word32 __s_fx[BINAURAL_CHANNELS]; +Word16 __s_q[BINAURAL_CHANNELS]; + +Word32 __nval_fx[BINAURAL_CHANNELS]; +Word16 __nval_q[BINAURAL_CHANNELS]; + +Word32 __Ure_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS]; +Word32 __Uim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS]; +Word16 __U_q; + +Word32 __D_fx[BINAURAL_CHANNELS]; +Word16 __D_q; + +Word32 __diff_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS]; +Word16 __diff_q; + +static void eig2x2_opt( + const Word32 E1_fx, /*q_E*/ + const Word32 E2_fx, /*q_E*/ + Word16 q_E, + const Word32 Cre_fx, /*q_C*/ + const Word32 Cim_fx, /*q_C*/ + Word16 q_C ) +{ + Word32 s_fx[BINAURAL_CHANNELS]; + Word32 pm_fx, add_fx; + Word32 tmp1, tmp2, e1, e2, c_re, c_im, c0_im, c1_im; + Word64 crossSquare_fx, tmp3, tmp4; + Word16 q_crossSquare, q_min, q_diff, q_tmp1, q_tmp2, exp, q_e, q_c; + Word32 i01, i00, i11, i10; + Word64 eps_fx = ( (Word64) EPSILON_MANT ) << 32; + Word16 eps_q = 63 - EPSILON_EXP; + move32(); + move16(); + + set32_fx( (Word32 *) __Ure_fx, 0, BINAURAL_CHANNELS * BINAURAL_CHANNELS ); + set32_fx( (Word32 *) __Uim_fx, 0, BINAURAL_CHANNELS * BINAURAL_CHANNELS ); + __Ure_fx[0][0] = ONE_IN_Q30; + move32(); + __Ure_fx[1][1] = ONE_IN_Q30; + move32(); + __U_q = Q30; + move16(); + + exp = sub( get_min_scalefactor( Cre_fx, Cim_fx ), 2 ); + c_re = L_shl( Cre_fx, exp ); + c_im = L_shl( Cim_fx, exp ); + q_c = add( q_C, exp ); + + exp = sub( get_min_scalefactor( E1_fx, E2_fx ), 2 ); + e1 = L_shl( E1_fx, exp ); + e2 = L_shl( E2_fx, exp ); + q_e = add( q_E, exp ); + + // crossSquare_fx = (c_re * c_re) + (c_im * c_im) + // a_fx = (e1 + e2) * (e1 + e2) - 4.0f * ((e1 * e2) - crossSquare_fx) = (e1 - e2)^2 + 4 * crossSquare_fx + // pm_fx = 0.5f * sqrtf(max(0.0f, a_fx)) + // add_fx = 0.5f * (e1 + e2) + + tmp1 = L_sub( e1, e2 ); + tmp3 = W_mult_32_32( tmp1, tmp1 ); + q_tmp1 = add( add( q_e, q_e ), 1 ); + if ( !tmp3 ) + { + q_tmp1 = 63; + move16(); + } + + crossSquare_fx = W_mac_32_32( W_mult_32_32( c_re, c_re ), c_im, c_im ); + q_crossSquare = add( add( q_c, q_c ), 1 ); + if ( !crossSquare_fx ) + { + q_crossSquare = 63; + move16(); + } + + tmp4 = crossSquare_fx; + move64(); + q_tmp2 = sub( q_crossSquare, 2 ); + if ( !tmp4 ) + { + q_tmp2 = 63; + move16(); + } + + q_diff = sub( q_tmp1, q_tmp2 ); + q_tmp1 = s_min( q_tmp1, q_tmp2 ); + if ( q_diff > 0 ) + { + tmp3 = W_shr( tmp3, q_diff ); + } + if ( q_diff < 0 ) + { + tmp4 = W_shl( tmp4, q_diff ); + } + tmp3 = W_add( tmp3, tmp4 ); + q_diff = W_norm( tmp3 ); + tmp3 = W_shl( tmp3, q_diff ); + q_tmp1 = add( q_tmp1, q_diff ); + + // pm_fx = 0.5f * sqrtf(max(0.0f, a_fx)) + exp = sub( 63, q_tmp1 ); + pm_fx = Sqrt32( L_max( 0, W_extract_h( tmp3 ) ), &exp ); + pm_fx = L_shr( pm_fx, 1 ); + q_tmp2 = sub( 31, exp ); + + // add_fx = 0.5 * (e1 + e2) + add_fx = L_shr( L_add( e1, e2 ), 1 ); + q_tmp1 = q_e; + move16(); + + __pm_fx = pm_fx; // FIXME + __pm_q = q_tmp2; // FIXME + __add_fx = add_fx; // FIXME + __add_q = q_tmp1; // FIXME + + // D[0] = add + pm; + // D[1] = max( 0.0f, add - pm ); + + q_diff = sub( q_tmp1, q_tmp2 ); + + tmp1 = add_fx; + move32(); + if ( q_diff > 0 ) + { + tmp1 = L_shr( tmp1, q_diff ); + } + + tmp2 = pm_fx; + move32(); + if ( q_diff < 0 ) + { + tmp2 = L_shl( tmp2, q_diff ); + } + + __D_fx[0] = L_add( tmp1, tmp2 ); + move32(); + __D_fx[1] = L_max( L_sub( tmp1, tmp2 ), 0 ); + move32(); + __D_q = s_min( q_tmp1, q_tmp2 ); + move32(); + + // Numeric case, when input is practically zeros + // if ( __D_fx[0] < EPSILON_FX ) + + if ( LT_32( L_shl_sat( __D_fx[0], sub( 31 - EPSILON_EXP, __D_q ) ), EPSILON_MANT ) ) + { + return; + } + + // Numeric case, when input is near an identity matrix with a gain + tmp1 = Mpy_32_32( INV_1000_Q31, add_fx ); + if ( q_diff > 0 ) + { + tmp1 = L_shr( tmp1, q_diff ); + } + + if ( LT_32( tmp2, tmp1 ) ) + { + return; + } + + // Eigenvectors + + q_diff = sub( q_e, __D_q ); + q_tmp1 = s_min( q_e, __D_q ); + + tmp1 = __D_fx[0]; + move32(); + if ( q_diff > 0 ) + { + tmp1 = L_shr( tmp1, q_diff ); + } + + tmp2 = __D_fx[1]; + move32(); + if ( q_diff > 0 ) + { + tmp2 = L_shr( tmp2, q_diff ); + } + + if ( q_diff < 0 ) + { + e1 = L_shl( e1, q_diff ); + } + + if ( q_diff < 0 ) + { + e2 = L_shl( e2, q_diff ); + } + + s_fx[0] = L_sub( tmp1, e1 ); // __D_fx[0] - e1 + tmp1 = L_sub( tmp1, e2 ); // __D_fx[0] - e2 + s_fx[1] = L_sub( tmp2, e1 ); // __D_fx[1] - e1 + tmp2 = L_sub( tmp2, e2 ); // __D_fx[1] - e2 + + __diff_fx[0][0] = s_fx[0]; + __diff_fx[0][1] = tmp1; + __diff_fx[1][0] = s_fx[1]; + __diff_fx[1][1] = tmp2; + + __diff_q = q_tmp1; + + i01 = GT_32( L_abs( tmp1 ), L_abs( s_fx[0] ) ); // fabsf( __D_fx[0] - e2 ) > fabsf( __D_fx[0] - e1 ) + i11 = GT_32( L_abs( tmp2 ), L_abs( s_fx[1] ) ); // fabsf( __D_fx[1] - e2 ) > fabsf( __D_fx[1] - e1 ) + + __as[0] = i01; + __as[1] = i11; + + if ( i01 ) + { + s_fx[0] = tmp1; + move32(); + } + + if ( i11 ) + { + s_fx[1] = tmp2; + move32(); + } + + __s_fx[0] = s_fx[0]; + __s_fx[1] = s_fx[1]; + __s_q[0] = q_tmp1; + __s_q[1] = q_tmp1; + + // normVal = sqrtf( 1.0f / ( 1e-12f + crossSquare + s * s ) ); + + Word32 nvalm[BINAURAL_CHANNELS]; + Word16 nvalq[BINAURAL_CHANNELS]; + + q_tmp2 = L_shl( q_tmp1, 1 ); + q_min = s_min( q_tmp2, q_crossSquare ); + q_min = s_min( q_min, eps_q ); + + Word64 nval64m[BINAURAL_CHANNELS]; + + q_diff = L_sub( q_tmp2, q_min ); + nval64m[0] = W_shr( W_mult0_32_32( s_fx[0], s_fx[0] ), q_diff ); + nval64m[1] = W_shr( W_mult0_32_32( s_fx[1], s_fx[1] ), q_diff ); + + q_diff = L_sub( q_crossSquare, q_min ); + crossSquare_fx = W_shr( crossSquare_fx, q_diff ); + nval64m[0] = W_add( nval64m[0], crossSquare_fx ); + nval64m[1] = W_add( nval64m[1], crossSquare_fx ); + + q_diff = L_sub( eps_q, q_min ); + eps_fx = W_shr( eps_fx, q_diff ); + nval64m[0] = W_add( nval64m[0], eps_fx ); + nval64m[1] = W_add( nval64m[1], eps_fx ); + + q_diff = W_norm( nval64m[0] ); + nval64m[0] = W_shl( nval64m[0], q_diff ); + nvalq[0] = add( q_min, q_diff ); + + q_diff = W_norm( nval64m[1] ); + nval64m[1] = W_shl( nval64m[1], q_diff ); + nvalq[1] = add( q_min, q_diff ); + + nvalm[0] = BASOP_Util_Divide3232_Scale_newton( ONE_IN_Q30, W_extract_h( nval64m[0] ), &exp ); + exp = sub( exp, sub( 62, nvalq[0] ) ); + nvalm[0] = Sqrt32( nvalm[0], &exp ); + nvalq[0] = sub( 31, exp ); + + nvalm[1] = BASOP_Util_Divide3232_Scale_newton( ONE_IN_Q30, W_extract_h( nval64m[1] ), &exp ); + exp = sub( exp, sub( 62, nvalq[1] ) ); + nvalm[1] = Sqrt32( nvalm[1], &exp ); + nvalq[1] = sub( 31, exp ); + + __nval_fx[0] = nvalm[0]; + __nval_q[0] = nvalq[0]; + __nval_fx[1] = nvalm[1]; + __nval_q[1] = nvalq[1]; + + q_diff = sub( q_c, q_tmp1 ); + q_tmp1 = s_min( q_tmp1, q_c ); + + if ( q_diff > 0 ) + { + c_re = L_shr( c_re, q_diff ); + } + + if ( q_diff > 0 ) + { + c_im = L_shr( c_im, q_diff ); + } + + if ( q_diff < 0 ) + { + s_fx[0] = L_shl( s_fx[0], q_diff ); + } + + if ( q_diff < 0 ) + { + s_fx[1] = L_shl( s_fx[1], q_diff ); + } + + q_diff = sub( nvalq[0], nvalq[1] ); + q_tmp2 = s_min( nvalq[0], nvalq[1] ); + + if ( q_diff > 0 ) + { + nvalm[0] = L_shr( nvalm[0], q_diff ); + } + + if ( q_diff < 0 ) + { + nvalm[1] = L_shl( nvalm[1], q_diff ); + } + + __U_q = sub( add( q_tmp1, q_tmp2 ), 31 ); + + i00 = L_sub( 1, i01 ); + i10 = L_sub( 1, i11 ); + + c0_im = c_im; + move32(); + if ( i00 > 0 ) + { + c0_im = L_negate( c0_im ); + } + + c1_im = c_im; + move32(); + if ( i10 > 0 ) + { + c1_im = L_negate( c1_im ); + } + + __Ure_fx[i00][0] = Mpy_32_32( s_fx[0], nvalm[0] ); + move32(); + __Ure_fx[i01][0] = Mpy_32_32( c_re, nvalm[0] ); + move32(); + __Uim_fx[i01][0] = Mpy_32_32( c0_im, nvalm[0] ); + move32(); + + __Ure_fx[i10][1] = Mpy_32_32( s_fx[1], nvalm[1] ); + move32(); + __Ure_fx[i11][1] = Mpy_32_32( c_re, nvalm[1] ); + move32(); + __Uim_fx[i11][1] = Mpy_32_32( c1_im, nvalm[1] ); + move32(); + + return; +} + static void eig2x2_fx( const Word32 E1_fx, /*q_E*/ const Word32 E2_fx, /*q_E*/ @@ -3557,7 +3910,23 @@ static void eig2x2_fx( Word32 D_fx[BINAURAL_CHANNELS], /*q_D*/ Word16 *q_D ) { - // #if 0 +#if 0 + eig2x2_opt( E1_fx, E2_fx, q_E, Cre_fx, Cim_fx, q_C ); + Ure_fx[0][0] = __Ure_fx[0][0]; + Ure_fx[0][1] = __Ure_fx[0][1]; + Ure_fx[1][0] = __Ure_fx[1][0]; + Ure_fx[1][1] = __Ure_fx[1][1]; + Uim_fx[0][0] = __Uim_fx[0][0]; + Uim_fx[0][1] = __Uim_fx[0][1]; + Uim_fx[1][0] = __Uim_fx[1][0]; + Uim_fx[1][1] = __Uim_fx[1][1]; + *q_U = __U_q; + D_fx[0] = __D_fx[0]; + D_fx[1] = __D_fx[1]; + *q_D = __D_q; +#else + eig2x2_opt( E1_fx, E2_fx, q_E, Cre_fx, Cim_fx, q_C ); + Word16 chA, chB, ch; Word32 s_fx, normVal_fx, crossSquare_fx, a_fx, pm_fx, add_fx; Word32 tmp1, tmp2, tmp3, e1, e2, c_re, c_im; @@ -3680,6 +4049,9 @@ static void eig2x2_fx( q_tmp1 = q_e; move16(); + check( __pm_fx, __pm_q, pm_fx, q_tmp2, 1 << 26 ); + check( __add_fx, __add_q, add_fx, q_tmp1, 1 << 26 ); + IF( LT_16( q_tmp1, q_tmp2 ) ) { D_fx[0] = L_add( L_shr( add_fx, 1 ), L_shr( pm_fx, add( sub( q_tmp2, q_tmp1 ), 1 ) ) ); @@ -3698,121 +4070,9 @@ static void eig2x2_fx( *q_D = sub( q_tmp2, 1 ); move16(); } -// #else -#if 0 - Word16 chA, chB, ch; - FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ ) - { - FOR( chB = 0; chB < BINAURAL_CHANNELS; chB++ ) - { - Ure_fx[chA][chB] = 0; - move32(); - Uim_fx[chA][chB] = 0; - move32(); - } - } -#endif - // =================================================================================================== - /*crossSquare_fx = (c_re * c_re) + (c_im * c_im) - a_fx = (e1 + e2) * (e1 + e2) - 4.0f * ((e1 * e2) - crossSquare_fx) = (e1 - e2)^2 + 4 * crossSquare_fx - pm_fx = 0.5f * sqrtf(max(0.0f, a_fx)) - add_fx = 0.5f * (e1 + e2)*/ - - Word16 q1, q2, qm, qd, lshift; - - // (e1 - e2)^2 -> Q: 2 * q_E - q1 = shl( q_E, 1 ); - // 4 * ((c_re * c_re) + (c_im * c_im)) -> Q: 2 * q_C - 2 - q2 = sub( shl( q_C, 1 ), 2 ); - - // (e1 - e2)^2 - Word32 es = L_sub( E1_fx, E2_fx ); - Word64 es2 = W_mult0_32_32( es, es ); - lshift = sub( W_norm( es2 ), 1 ); - es2 = W_shl( es2, lshift ); - q1 = add( q1, lshift ); - if ( !es2 ) - { - q1 = 63; - move16(); - } - - // 4 * ((c_re * c_re) + (c_im * c_im)) - Word64 cs = W_add( W_mult0_32_32( Cre_fx, Cre_fx ), W_mult0_32_32( Cim_fx, Cim_fx ) ); // 2*q_C-2 - lshift = sub( W_norm( cs ), 1 ); - cs = W_shl( cs, lshift ); - q2 = add( q2, lshift ); - if ( !cs ) - { - q2 = 63; - move16(); - } - -#if 0 - Word32 crossSquare_fx = (Word32) ( cs >> 32 ); // FIXME - Word16 q_crossSquare = 2 * q_C + lshift - 32; // FIXME -#endif - Word16 csq = 2 * q_C + lshift; // FIXME - - // a = max(0, (e1 - e2)^2 + 4 * crossSquare_fx) - qm = s_min( q1, q2 ); - qd = sub( q1, q2 ); - Word64 a = W_max( W_add( W_shr( es2, s_max( qd, 0 ) ), W_shl( cs, s_min( qd, 0 ) ) ), 0 ); - - // pm = 0.5f * sqrtf(a) - // a = 0.5f * ( E1 + E2 ); - lshift = W_norm( a ); - Word32 pm = W_extract_h( W_shl( a, lshift ) ); - Word16 e = sub( sub( 63, lshift ), qm ); - pm = L_shr( Sqrt32( pm, &e ), 1 ); - q2 = sub( 31, e ); - check( pm, q2, pm_fx, q_tmp2, 1 << 16 ); - Word32 ea = L_add( E1_fx, E2_fx ); - lshift = sub( norm_l( ea ), 1 ); - ea = L_shl( ea, lshift ); - q1 = add( add( q_E, 1 ), lshift ); - check( ea, q1, add_fx, q_tmp1, 1 << 16 ); - -#if 0 - Word32 add_fx = ea; // FIXME - Word16 q_tmp1 = q1; // FIXME - Word32 pm_fx = pm; // FIXME - Word16 q_tmp2 = q2; // FIXME -#endif - - // D[0] = add + pm; - // D[1] = max( 0.0f, add - pm ); - qm = s_min( q1, q2 ); - qd = sub( q1, q2 ); - ea = L_shr( ea, s_max( qd, 0 ) ); - pm = L_shl( pm, s_min( qd, 0 ) ); - Word32 d0 = L_add( ea, pm ); - Word32 d1 = L_max( L_sub( ea, pm ), 0 ); - check( d0, qm, D_fx[0], *q_D, 1 << 16 ); - check( d1, qm, D_fx[1], *q_D, 1 << 16 ); - -#if 0 - D_fx[0] = d0; // FIXME - D_fx[1] = d1; // FIXME - *q_D = qm; // FIXME - - Word32 tmp1, tmp2, tmp3, e1, e2, s_fx, normVal_fx, c_re, c_im; // FIXME - Word16 q_U_1, q_U_2, q_c, q_e, exp, exp_tmp3; // FIXME - Word32 epsilon_mant = 1180591621; // FIXME - Word16 epsilon_exp = -39; // FIXME - - exp = sub( get_min_scalefactor( Cre_fx, Cim_fx ), 2 ); // FIXME - c_re = L_shl( Cre_fx, exp ); // FIXME - c_im = L_shl( Cim_fx, exp ); // FIXME - q_c = add( q_C, exp ); // FIXME - - exp = sub( get_min_scalefactor( E1_fx, E2_fx ), 2 ); // FIXME - e1 = L_shl( E1_fx, exp ); // FIXME - e2 = L_shl( E2_fx, exp ); // FIXME - q_e = add( q_E, exp ); // FIXME -#endif - // #endif + check( __D_fx[0], __D_q, D_fx[0], *q_D, 1 << 26 ); + check( __D_fx[1], __D_q, D_fx[1], *q_D, 1 << 26 ); /* Numeric case, when input is practically zeros */ // IF( D_fx[0] < EPSILON_FX ) @@ -3826,6 +4086,15 @@ static void eig2x2_fx( *q_U = Q31; move16(); + for ( int i = 0; i < BINAURAL_CHANNELS; ++i ) + { + for ( int j = 0; j < BINAURAL_CHANNELS; ++j ) + { + check( __Ure_fx[i][j], __U_q, Ure_fx[i][j], *q_U, 1 << 2 ); + check( __Uim_fx[i][j], __U_q, Uim_fx[i][j], *q_U, 1 << 2 ); + } + } + return; } @@ -3843,6 +4112,15 @@ static void eig2x2_fx( *q_U = Q30; move16(); + for ( int i = 0; i < BINAURAL_CHANNELS; ++i ) + { + for ( int j = 0; j < BINAURAL_CHANNELS; ++j ) + { + check( __Ure_fx[i][j], __U_q, Ure_fx[i][j], *q_U, 1 << 2 ); + check( __Uim_fx[i][j], __U_q, Uim_fx[i][j], *q_U, 1 << 2 ); + } + } + return; } } @@ -3857,6 +4135,15 @@ static void eig2x2_fx( *q_U = Q30; move16(); + for ( int i = 0; i < BINAURAL_CHANNELS; ++i ) + { + for ( int j = 0; j < BINAURAL_CHANNELS; ++j ) + { + check( __Ure_fx[i][j], __U_q, Ure_fx[i][j], *q_U, 1 << 2 ); + check( __Uim_fx[i][j], __U_q, Uim_fx[i][j], *q_U, 1 << 2 ); + } + } + return; } } @@ -3885,68 +4172,18 @@ static void eig2x2_fx( move16(); } - // D[ch] - E1 - // D[ch] - E2 - - Word32 arg0, arg1, arg2; - Word32 z1, z2, ad, s; - - qm = s_min( *q_D, q_e ); - qd = sub( *q_D, q_e ); - - arg0 = L_shr( D_fx[ch], s_max( qd, 0 ) ); - arg1 = L_shl( e1, s_min( qd, 0 ) ); - arg2 = L_shl( e2, s_min( qd, 0 ) ); - z1 = L_sub( arg0, arg1 ); - z2 = L_sub( arg0, arg2 ); - - check( z1, qm, tmp1, q_tmp1, 1 << 1 ); - check( z2, qm, tmp2, q_tmp1, 1 << 1 ); - - ad = L_sub( L_abs( z2 ), L_abs( z1 ) ); - if ( ad > 0 ) - { - s = z2; - } - if ( ad <= 0 ) - { - s = z1; - } - - Word64 epsm = ( (Word64) 1180591621 ) << 32; // FIXME - Word16 epsq = 63 - epsilon_exp; // FIXME - - Word16 sq = qm; - Word16 s2q = L_shl( sq, 1 ); - qm = s_min( s2q, csq ); - qm = s_min( qm, epsq ); - - Word64 nval64m; - Word64 nval32m; - Word16 nvalq, nvale; - nval64m = W_shr( W_mult0_32_32( s, s ), L_sub( s2q, qm ) ); - nval64m = W_add( nval64m, W_shr( cs, L_sub( csq, qm ) ) ); - nval64m = W_add( nval64m, W_shr( epsm, L_sub( epsq, qm ) ) ); - lshift = W_norm( nval64m ); - nval64m = W_shl( nval64m, lshift ); - nvalq = add( qm, lshift ); - - Word32 test0m = W_extract_h( nval64m ); // FIXME - Word32 test0q = nvalq; // FIXME - - nval32m = BASOP_Util_Divide3232_Scale_newton( ONE_IN_Q30, W_extract_h( nval64m ), &nvale ); - nvale = sub( nvale, sub( Q30, nvalq - 32 ) ); - nval32m = Sqrt32( nval32m, &nvale ); - nvalq = sub( 31, nvale ); - - Word32 test1m = nval32m; // FIXME - Word32 test1q = nvalq; // FIXME - + check( __diff_fx[ch][0], __diff_q, tmp1, q_tmp1, 1 << 26 ); + check( __diff_fx[ch][1], __diff_q, tmp2, q_tmp1, 1 << 26 ); IF( GT_32( L_abs( tmp2 ), L_abs( tmp1 ) ) ) { + check( __as[ch], 0, 1, 0, 0 ); + s_fx = tmp2; move32(); + + check( __s_fx[ch], __s_q[ch], s_fx, q_tmp1, 1 << 26 ); + exp = sub( norm_l( s_fx ), 1 ); tmp2 = Mpy_32_32( s_fx, s_fx ); q_tmp2 = sub( add( q_tmp1, q_tmp1 ), 31 ); @@ -3956,14 +4193,12 @@ static void eig2x2_fx( tmp3 = BASOP_Util_Add_Mant32Exp( tmp2, sub( 31, q_tmp2 ), epsilon_mant, epsilon_exp, &exp_tmp3 ); - check( test0m, test0q, tmp3, 31 - exp_tmp3, 1 << 18 ); // TODO: remove - tmp2 = BASOP_Util_Divide3232_Scale_newton( ONE_IN_Q30, tmp3, &exp ); exp = sub( exp, sub( Q30, sub( 31, exp_tmp3 ) ) ); normVal_fx = Sqrt32( tmp2, &exp ); // q_tmp2 q_tmp2 = sub( 31, exp ); - check( test1m, test1q, normVal_fx, q_tmp2, 1 << 18 ); // TODO: remove + check( __nval_fx[ch], __nval_q[ch], normVal_fx, q_tmp2, 1 << 26 ); q_diff = sub( q_c, q_tmp1 ); IF( q_diff > 0 ) @@ -3988,6 +4223,10 @@ static void eig2x2_fx( move32(); q_U_1 = sub( add( q_tmp1, q_tmp2 ), 31 ); + check( __Ure_fx[0][ch], __U_q, Ure_fx[0][ch], q_U_1, 1 << 26 ); + check( __Ure_fx[1][ch], __U_q, Ure_fx[1][ch], q_U_1, 1 << 26 ); + check( __Uim_fx[1][ch], __U_q, Uim_fx[1][ch], q_U_1, 1 << 26 ); + IF( q_U_2 != 0 ) { q_diff = sub( q_U_2, q_U_1 ); @@ -4019,9 +4258,13 @@ static void eig2x2_fx( } ELSE { + check( __as[ch], 0, 0, 0, 0 ); + s_fx = tmp1; move32(); + check( __s_fx[ch], __s_q[ch], s_fx, q_tmp1, 1 << 26 ); + exp = sub( norm_l( s_fx ), 1 ); tmp2 = Mpy_32_32( s_fx, s_fx ); q_tmp2 = sub( add( q_tmp1, q_tmp1 ), 31 ); @@ -4031,14 +4274,12 @@ static void eig2x2_fx( tmp3 = BASOP_Util_Add_Mant32Exp( tmp2, sub( 31, q_tmp2 ), epsilon_mant, epsilon_exp, &exp_tmp3 ); - check( test0m, test0q, tmp3, 31 - exp_tmp3, 1 << 18 ); // TODO: remove - tmp2 = BASOP_Util_Divide3232_Scale_newton( ONE_IN_Q30, tmp3, &exp ); exp = sub( exp, sub( Q30, sub( 31, exp_tmp3 ) ) ); normVal_fx = Sqrt32( tmp2, &exp ); // q_tmp2 q_tmp2 = sub( 31, exp ); - check( test1m, test1q, normVal_fx, q_tmp2, 1 << 18 ); // TODO: remove + check( __nval_fx[ch], __nval_q[ch], normVal_fx, q_tmp2, 1 << 26 ); q_diff = sub( q_c, q_tmp1 ); IF( q_diff > 0 ) @@ -4063,6 +4304,10 @@ static void eig2x2_fx( move32(); q_U_2 = sub( add( q_tmp1, q_tmp2 ), 31 ); + check( __Ure_fx[1][ch], __U_q, Ure_fx[1][ch], q_U_2, 1 << 26 ); + check( __Ure_fx[0][ch], __U_q, Ure_fx[0][ch], q_U_2, 1 << 26 ); + check( __Uim_fx[0][ch], __U_q, Uim_fx[0][ch], q_U_2, 1 << 26 ); + IF( q_U_1 != 0 ) { q_diff = sub( q_U_2, q_U_1 ); @@ -4106,6 +4351,15 @@ static void eig2x2_fx( move16(); } + for ( int i = 0; i < BINAURAL_CHANNELS; ++i ) + { + for ( int j = 0; j < BINAURAL_CHANNELS; ++j ) + { + check( __Ure_fx[i][j], __U_q, Ure_fx[i][j], *q_U, 1 << 26 ); + check( __Uim_fx[i][j], __U_q, Uim_fx[i][j], *q_U, 1 << 26 ); + } + } +#endif return; } -- GitLab From 76fc3d302e735243d59e521717db453dba125d41 Mon Sep 17 00:00:00 2001 From: Nicolas Roussin Date: Wed, 12 Nov 2025 09:36:34 +0000 Subject: [PATCH 10/19] Finalize optimization. --- lib_basop/enh64.h | 102 -------- lib_com/options.h | 6 + lib_debug/wmc_auto.c | 2 +- lib_debug/wmc_auto.h | 2 - .../ivas_dirac_dec_binaural_functions_fx.c | 237 +++--------------- 5 files changed, 42 insertions(+), 307 deletions(-) diff --git a/lib_basop/enh64.h b/lib_basop/enh64.h index ab21d5b0c..c3896bb0d 100644 --- a/lib_basop/enh64.h +++ b/lib_basop/enh64.h @@ -21,108 +21,6 @@ * *****************************************************************************/ #ifdef ENH_64_BIT_OPERATOR - - -/*______________________________________________________________________________ -| | -| Function Name : W_min | -| | -| Purpose : | -| | -| Compares L64_var1 and L64_var2 and returns the minimum value. | -| | -| Complexity weight : 1 | -| | -| Inputs : | -| | -| L64_var1 64 bit long signed integer (Word64) whose value falls in the | -| range : 0x80000000 00000000LL <= L64_var1 <= 0x7fffffff ffffffffLL. | -| | -| L64_var2 64 bit long signed integer (Word64) whose value falls in the | -| range : 0x80000000 00000000LL <= L64_var2 <= 0x7fffffff ffffffffLL. | -| | -| Outputs : | -| | -| none | -| | -| Return Value : | -| | -| L64_var_out | -| 64 bit long signed integer (Word64) whose value falls in the | -| range : 0x80000000 00000000LL <= L64_var_out <= 0x7fffffff ffffffffLL. | -|______________________________________________________________________________| -*/ -static __inline Word64 W_min( Word64 L64_var1, Word64 L64_var2 ) -{ - Word64 L64_var_out; - - if ( L64_var1 <= L64_var2 ) - { - L64_var_out = L64_var1; - } - else - { - L64_var_out = L64_var2; - } - -#ifdef WMOPS - multiCounter[currCounter].W_min++; -#endif /* ifdef WMOPS */ - - return ( L64_var_out ); -} - - -/*______________________________________________________________________________ -| | -| Function Name : W_max | -| | -| Purpose : | -| | -| Compares L64_var1 and L64_var2 and returns the maximum value. | -| | -| Complexity weight : 1 | -| | -| Inputs : | -| | -| L64_var1 64 bit long signed integer (Word64) whose value falls in the | -| range : 0x80000000 00000000LL <= L64_var1 <= 0x7fffffff ffffffffLL. | -| | -| L64_var2 64 bit long signed integer (Word64) whose value falls in the | -| range : 0x80000000 00000000LL <= L64_var2 <= 0x7fffffff ffffffffLL. | -| | -| Outputs : | -| | -| none | -| | -| Return Value : | -| | -| L64_var_out | -| 64 bit long signed integer (Word64) whose value falls in the | -| range : 0x80000000 00000000LL <= L64_var_out <= 0x7fffffff ffffffffLL. | -|______________________________________________________________________________| -*/ -static __inline Word64 W_max( Word64 L64_var1, Word64 L64_var2 ) -{ - Word64 L64_var_out; - - if ( L64_var1 >= L64_var2 ) - { - L64_var_out = L64_var1; - } - else - { - L64_var_out = L64_var2; - } - -#ifdef WMOPS - multiCounter[currCounter].W_max++; -#endif /* ifdef WMOPS */ - - return ( L64_var_out ); -} - - Word64 W_add_nosat( Word64 L64_var1, Word64 L64_var2 ); Word64 W_sub_nosat( Word64 L64_var1, Word64 L64_var2 ); Word64 W_shl( Word64 L64_var1, Word16 var2 ); diff --git a/lib_com/options.h b/lib_com/options.h index 10ea208a2..eb655ce24 100644 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -159,4 +159,10 @@ /* #################### End BASOP porting switches ############################ */ +/* #################### Start BASOP optimization switches ############################ */ + +#define NONBE_OPT_2193_EIG2X2 /* Dolby: Issue 2193, optimize eig2x2_fx. */ + +/* #################### End BASOP optimization switches ############################ */ + #endif diff --git a/lib_debug/wmc_auto.c b/lib_debug/wmc_auto.c index 9d4d573f0..5afd9de16 100644 --- a/lib_debug/wmc_auto.c +++ b/lib_debug/wmc_auto.c @@ -133,7 +133,7 @@ static BASIC_OP op_weight = { #ifdef ENH_64_BIT_OPERATOR /* Weights of new 64 bit basops */ , - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 #endif /* #ifdef ENH_64_BIT_OPERATOR */ #ifdef ENH_32_BIT_OPERATOR diff --git a/lib_debug/wmc_auto.h b/lib_debug/wmc_auto.h index 6dff36f50..64e2c751a 100644 --- a/lib_debug/wmc_auto.h +++ b/lib_debug/wmc_auto.h @@ -877,8 +877,6 @@ typedef struct /* New 64 bit basops */ #ifdef ENH_64_BIT_OPERATOR unsigned int move64; /* Complexity Weight of 1 */ - unsigned int W_min; /* Complexity Weight of 1 */ - unsigned int W_max; /* Complexity Weight of 1 */ unsigned int W_add_nosat; /* Complexity Weight of 1 */ unsigned int W_sub_nosat; /* Complexity Weight of 1 */ unsigned int W_shl; /* Complexity Weight of 1 */ diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c index 77b7b67c6..6bbb57349 100644 --- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c +++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c @@ -3518,63 +3518,20 @@ static void ivas_dirac_dec_binaural_check_and_switch_transports_headtracked_fx( return; } -#if 1 -static void check( - Word32 computed_fx, - Word16 computed_q, - Word32 expected_fx, - Word16 expected_q, - Word32 max_abs_err ); - -static void check( - Word32 computed_fx, - Word16 computed_q, - Word32 expected_fx, - Word16 expected_q, - Word32 max_abs_err ) -{ - Word16 qd = computed_q - expected_q; - Word32 cf = computed_fx >> +max( qd, 0 ); - Word32 ef = expected_fx >> -min( qd, 0 ); - Word32 abs_error = abs( cf - ef ); - if ( abs_error > max_abs_err ) - { - assert( false ); - } -} -#endif - -Word32 __pm_fx; -Word16 __pm_q; -Word32 __add_fx; -Word16 __add_q; - -Word32 __as[BINAURAL_CHANNELS]; - -Word32 __s_fx[BINAURAL_CHANNELS]; -Word16 __s_q[BINAURAL_CHANNELS]; - -Word32 __nval_fx[BINAURAL_CHANNELS]; -Word16 __nval_q[BINAURAL_CHANNELS]; - -Word32 __Ure_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS]; -Word32 __Uim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS]; -Word16 __U_q; - -Word32 __D_fx[BINAURAL_CHANNELS]; -Word16 __D_q; - -Word32 __diff_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS]; -Word16 __diff_q; - -static void eig2x2_opt( +static void eig2x2_fx( const Word32 E1_fx, /*q_E*/ const Word32 E2_fx, /*q_E*/ Word16 q_E, const Word32 Cre_fx, /*q_C*/ const Word32 Cim_fx, /*q_C*/ - Word16 q_C ) + Word16 q_C, + Word32 Ure_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_U*/ + Word32 Uim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_U*/ + Word16 *q_U, + Word32 D_fx[BINAURAL_CHANNELS], /*q_D*/ + Word16 *q_D ) { +#ifdef NONBE_OPT_2193_EIG2X2 Word32 s_fx[BINAURAL_CHANNELS]; Word32 pm_fx, add_fx; Word32 tmp1, tmp2, e1, e2, c_re, c_im, c0_im, c1_im; @@ -3586,13 +3543,13 @@ static void eig2x2_opt( move32(); move16(); - set32_fx( (Word32 *) __Ure_fx, 0, BINAURAL_CHANNELS * BINAURAL_CHANNELS ); - set32_fx( (Word32 *) __Uim_fx, 0, BINAURAL_CHANNELS * BINAURAL_CHANNELS ); - __Ure_fx[0][0] = ONE_IN_Q30; + set32_fx( (Word32 *) Ure_fx, 0, BINAURAL_CHANNELS * BINAURAL_CHANNELS ); + set32_fx( (Word32 *) Uim_fx, 0, BINAURAL_CHANNELS * BINAURAL_CHANNELS ); + Ure_fx[0][0] = ONE_IN_Q30; move32(); - __Ure_fx[1][1] = ONE_IN_Q30; + Ure_fx[1][1] = ONE_IN_Q30; move32(); - __U_q = Q30; + *q_U = Q30; move16(); exp = sub( get_min_scalefactor( Cre_fx, Cim_fx ), 2 ); @@ -3662,11 +3619,6 @@ static void eig2x2_opt( q_tmp1 = q_e; move16(); - __pm_fx = pm_fx; // FIXME - __pm_q = q_tmp2; // FIXME - __add_fx = add_fx; // FIXME - __add_q = q_tmp1; // FIXME - // D[0] = add + pm; // D[1] = max( 0.0f, add - pm ); @@ -3686,17 +3638,17 @@ static void eig2x2_opt( tmp2 = L_shl( tmp2, q_diff ); } - __D_fx[0] = L_add( tmp1, tmp2 ); + D_fx[0] = L_add( tmp1, tmp2 ); move32(); - __D_fx[1] = L_max( L_sub( tmp1, tmp2 ), 0 ); + D_fx[1] = L_max( L_sub( tmp1, tmp2 ), 0 ); move32(); - __D_q = s_min( q_tmp1, q_tmp2 ); + *q_D = s_min( q_tmp1, q_tmp2 ); move32(); // Numeric case, when input is practically zeros - // if ( __D_fx[0] < EPSILON_FX ) + // if ( D_fx[0] < EPSILON_FX ) - if ( LT_32( L_shl_sat( __D_fx[0], sub( 31 - EPSILON_EXP, __D_q ) ), EPSILON_MANT ) ) + if ( LT_32( L_shl_sat( D_fx[0], sub( 31 - EPSILON_EXP, *q_D ) ), EPSILON_MANT ) ) { return; } @@ -3715,17 +3667,17 @@ static void eig2x2_opt( // Eigenvectors - q_diff = sub( q_e, __D_q ); - q_tmp1 = s_min( q_e, __D_q ); + q_diff = sub( q_e, *q_D ); + q_tmp1 = s_min( q_e, *q_D ); - tmp1 = __D_fx[0]; + tmp1 = D_fx[0]; move32(); if ( q_diff > 0 ) { tmp1 = L_shr( tmp1, q_diff ); } - tmp2 = __D_fx[1]; + tmp2 = D_fx[1]; move32(); if ( q_diff > 0 ) { @@ -3742,23 +3694,13 @@ static void eig2x2_opt( e2 = L_shl( e2, q_diff ); } - s_fx[0] = L_sub( tmp1, e1 ); // __D_fx[0] - e1 - tmp1 = L_sub( tmp1, e2 ); // __D_fx[0] - e2 - s_fx[1] = L_sub( tmp2, e1 ); // __D_fx[1] - e1 - tmp2 = L_sub( tmp2, e2 ); // __D_fx[1] - e2 - - __diff_fx[0][0] = s_fx[0]; - __diff_fx[0][1] = tmp1; - __diff_fx[1][0] = s_fx[1]; - __diff_fx[1][1] = tmp2; - - __diff_q = q_tmp1; - - i01 = GT_32( L_abs( tmp1 ), L_abs( s_fx[0] ) ); // fabsf( __D_fx[0] - e2 ) > fabsf( __D_fx[0] - e1 ) - i11 = GT_32( L_abs( tmp2 ), L_abs( s_fx[1] ) ); // fabsf( __D_fx[1] - e2 ) > fabsf( __D_fx[1] - e1 ) + s_fx[0] = L_sub( tmp1, e1 ); // D_fx[0] - e1 + tmp1 = L_sub( tmp1, e2 ); // D_fx[0] - e2 + s_fx[1] = L_sub( tmp2, e1 ); // D_fx[1] - e1 + tmp2 = L_sub( tmp2, e2 ); // D_fx[1] - e2 - __as[0] = i01; - __as[1] = i11; + i01 = GT_32( L_abs( tmp1 ), L_abs( s_fx[0] ) ); // fabsf( D_fx[0] - e2 ) > fabsf( D_fx[0] - e1 ) + i11 = GT_32( L_abs( tmp2 ), L_abs( s_fx[1] ) ); // fabsf( D_fx[1] - e2 ) > fabsf( D_fx[1] - e1 ) if ( i01 ) { @@ -3772,11 +3714,6 @@ static void eig2x2_opt( move32(); } - __s_fx[0] = s_fx[0]; - __s_fx[1] = s_fx[1]; - __s_q[0] = q_tmp1; - __s_q[1] = q_tmp1; - // normVal = sqrtf( 1.0f / ( 1e-12f + crossSquare + s * s ) ); Word32 nvalm[BINAURAL_CHANNELS]; @@ -3820,11 +3757,6 @@ static void eig2x2_opt( nvalm[1] = Sqrt32( nvalm[1], &exp ); nvalq[1] = sub( 31, exp ); - __nval_fx[0] = nvalm[0]; - __nval_q[0] = nvalq[0]; - __nval_fx[1] = nvalm[1]; - __nval_q[1] = nvalq[1]; - q_diff = sub( q_c, q_tmp1 ); q_tmp1 = s_min( q_tmp1, q_c ); @@ -3861,7 +3793,7 @@ static void eig2x2_opt( nvalm[1] = L_shl( nvalm[1], q_diff ); } - __U_q = sub( add( q_tmp1, q_tmp2 ), 31 ); + *q_U = sub( add( q_tmp1, q_tmp2 ), 31 ); i00 = L_sub( 1, i01 ); i10 = L_sub( 1, i11 ); @@ -3880,53 +3812,20 @@ static void eig2x2_opt( c1_im = L_negate( c1_im ); } - __Ure_fx[i00][0] = Mpy_32_32( s_fx[0], nvalm[0] ); + Ure_fx[i00][0] = Mpy_32_32( s_fx[0], nvalm[0] ); move32(); - __Ure_fx[i01][0] = Mpy_32_32( c_re, nvalm[0] ); + Ure_fx[i01][0] = Mpy_32_32( c_re, nvalm[0] ); move32(); - __Uim_fx[i01][0] = Mpy_32_32( c0_im, nvalm[0] ); + Uim_fx[i01][0] = Mpy_32_32( c0_im, nvalm[0] ); move32(); - __Ure_fx[i10][1] = Mpy_32_32( s_fx[1], nvalm[1] ); + Ure_fx[i10][1] = Mpy_32_32( s_fx[1], nvalm[1] ); move32(); - __Ure_fx[i11][1] = Mpy_32_32( c_re, nvalm[1] ); + Ure_fx[i11][1] = Mpy_32_32( c_re, nvalm[1] ); move32(); - __Uim_fx[i11][1] = Mpy_32_32( c1_im, nvalm[1] ); + Uim_fx[i11][1] = Mpy_32_32( c1_im, nvalm[1] ); move32(); - - return; -} - -static void eig2x2_fx( - const Word32 E1_fx, /*q_E*/ - const Word32 E2_fx, /*q_E*/ - Word16 q_E, - const Word32 Cre_fx, /*q_C*/ - const Word32 Cim_fx, /*q_C*/ - Word16 q_C, - Word32 Ure_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_U*/ - Word32 Uim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_U*/ - Word16 *q_U, - Word32 D_fx[BINAURAL_CHANNELS], /*q_D*/ - Word16 *q_D ) -{ -#if 0 - eig2x2_opt( E1_fx, E2_fx, q_E, Cre_fx, Cim_fx, q_C ); - Ure_fx[0][0] = __Ure_fx[0][0]; - Ure_fx[0][1] = __Ure_fx[0][1]; - Ure_fx[1][0] = __Ure_fx[1][0]; - Ure_fx[1][1] = __Ure_fx[1][1]; - Uim_fx[0][0] = __Uim_fx[0][0]; - Uim_fx[0][1] = __Uim_fx[0][1]; - Uim_fx[1][0] = __Uim_fx[1][0]; - Uim_fx[1][1] = __Uim_fx[1][1]; - *q_U = __U_q; - D_fx[0] = __D_fx[0]; - D_fx[1] = __D_fx[1]; - *q_D = __D_q; #else - eig2x2_opt( E1_fx, E2_fx, q_E, Cre_fx, Cim_fx, q_C ); - Word16 chA, chB, ch; Word32 s_fx, normVal_fx, crossSquare_fx, a_fx, pm_fx, add_fx; Word32 tmp1, tmp2, tmp3, e1, e2, c_re, c_im; @@ -4049,9 +3948,6 @@ static void eig2x2_fx( q_tmp1 = q_e; move16(); - check( __pm_fx, __pm_q, pm_fx, q_tmp2, 1 << 26 ); - check( __add_fx, __add_q, add_fx, q_tmp1, 1 << 26 ); - IF( LT_16( q_tmp1, q_tmp2 ) ) { D_fx[0] = L_add( L_shr( add_fx, 1 ), L_shr( pm_fx, add( sub( q_tmp2, q_tmp1 ), 1 ) ) ); @@ -4071,9 +3967,6 @@ static void eig2x2_fx( move16(); } - check( __D_fx[0], __D_q, D_fx[0], *q_D, 1 << 26 ); - check( __D_fx[1], __D_q, D_fx[1], *q_D, 1 << 26 ); - /* Numeric case, when input is practically zeros */ // IF( D_fx[0] < EPSILON_FX ) @@ -4086,15 +3979,6 @@ static void eig2x2_fx( *q_U = Q31; move16(); - for ( int i = 0; i < BINAURAL_CHANNELS; ++i ) - { - for ( int j = 0; j < BINAURAL_CHANNELS; ++j ) - { - check( __Ure_fx[i][j], __U_q, Ure_fx[i][j], *q_U, 1 << 2 ); - check( __Uim_fx[i][j], __U_q, Uim_fx[i][j], *q_U, 1 << 2 ); - } - } - return; } @@ -4112,15 +3996,6 @@ static void eig2x2_fx( *q_U = Q30; move16(); - for ( int i = 0; i < BINAURAL_CHANNELS; ++i ) - { - for ( int j = 0; j < BINAURAL_CHANNELS; ++j ) - { - check( __Ure_fx[i][j], __U_q, Ure_fx[i][j], *q_U, 1 << 2 ); - check( __Uim_fx[i][j], __U_q, Uim_fx[i][j], *q_U, 1 << 2 ); - } - } - return; } } @@ -4135,15 +4010,6 @@ static void eig2x2_fx( *q_U = Q30; move16(); - for ( int i = 0; i < BINAURAL_CHANNELS; ++i ) - { - for ( int j = 0; j < BINAURAL_CHANNELS; ++j ) - { - check( __Ure_fx[i][j], __U_q, Ure_fx[i][j], *q_U, 1 << 2 ); - check( __Uim_fx[i][j], __U_q, Uim_fx[i][j], *q_U, 1 << 2 ); - } - } - return; } } @@ -4172,18 +4038,10 @@ static void eig2x2_fx( move16(); } - check( __diff_fx[ch][0], __diff_q, tmp1, q_tmp1, 1 << 26 ); - check( __diff_fx[ch][1], __diff_q, tmp2, q_tmp1, 1 << 26 ); - IF( GT_32( L_abs( tmp2 ), L_abs( tmp1 ) ) ) { - check( __as[ch], 0, 1, 0, 0 ); - s_fx = tmp2; move32(); - - check( __s_fx[ch], __s_q[ch], s_fx, q_tmp1, 1 << 26 ); - exp = sub( norm_l( s_fx ), 1 ); tmp2 = Mpy_32_32( s_fx, s_fx ); q_tmp2 = sub( add( q_tmp1, q_tmp1 ), 31 ); @@ -4198,8 +4056,6 @@ static void eig2x2_fx( normVal_fx = Sqrt32( tmp2, &exp ); // q_tmp2 q_tmp2 = sub( 31, exp ); - check( __nval_fx[ch], __nval_q[ch], normVal_fx, q_tmp2, 1 << 26 ); - q_diff = sub( q_c, q_tmp1 ); IF( q_diff > 0 ) { @@ -4223,10 +4079,6 @@ static void eig2x2_fx( move32(); q_U_1 = sub( add( q_tmp1, q_tmp2 ), 31 ); - check( __Ure_fx[0][ch], __U_q, Ure_fx[0][ch], q_U_1, 1 << 26 ); - check( __Ure_fx[1][ch], __U_q, Ure_fx[1][ch], q_U_1, 1 << 26 ); - check( __Uim_fx[1][ch], __U_q, Uim_fx[1][ch], q_U_1, 1 << 26 ); - IF( q_U_2 != 0 ) { q_diff = sub( q_U_2, q_U_1 ); @@ -4258,13 +4110,9 @@ static void eig2x2_fx( } ELSE { - check( __as[ch], 0, 0, 0, 0 ); - s_fx = tmp1; move32(); - check( __s_fx[ch], __s_q[ch], s_fx, q_tmp1, 1 << 26 ); - exp = sub( norm_l( s_fx ), 1 ); tmp2 = Mpy_32_32( s_fx, s_fx ); q_tmp2 = sub( add( q_tmp1, q_tmp1 ), 31 ); @@ -4279,8 +4127,6 @@ static void eig2x2_fx( normVal_fx = Sqrt32( tmp2, &exp ); // q_tmp2 q_tmp2 = sub( 31, exp ); - check( __nval_fx[ch], __nval_q[ch], normVal_fx, q_tmp2, 1 << 26 ); - q_diff = sub( q_c, q_tmp1 ); IF( q_diff > 0 ) { @@ -4304,10 +4150,6 @@ static void eig2x2_fx( move32(); q_U_2 = sub( add( q_tmp1, q_tmp2 ), 31 ); - check( __Ure_fx[1][ch], __U_q, Ure_fx[1][ch], q_U_2, 1 << 26 ); - check( __Ure_fx[0][ch], __U_q, Ure_fx[0][ch], q_U_2, 1 << 26 ); - check( __Uim_fx[0][ch], __U_q, Uim_fx[0][ch], q_U_2, 1 << 26 ); - IF( q_U_1 != 0 ) { q_diff = sub( q_U_2, q_U_1 ); @@ -4350,15 +4192,6 @@ static void eig2x2_fx( *q_U = q_U_2; move16(); } - - for ( int i = 0; i < BINAURAL_CHANNELS; ++i ) - { - for ( int j = 0; j < BINAURAL_CHANNELS; ++j ) - { - check( __Ure_fx[i][j], __U_q, Ure_fx[i][j], *q_U, 1 << 26 ); - check( __Uim_fx[i][j], __U_q, Uim_fx[i][j], *q_U, 1 << 26 ); - } - } #endif return; } -- GitLab From 6411b45e18ac83411315d8e710045df023bb3455 Mon Sep 17 00:00:00 2001 From: Nicolas Roussin Date: Wed, 12 Nov 2025 10:28:35 +0000 Subject: [PATCH 11/19] Apply clang format. --- lib_rend/ivas_dirac_dec_binaural_functions_fx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c index 6bbb57349..e3b64440b 100644 --- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c +++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c @@ -3592,7 +3592,7 @@ static void eig2x2_fx( q_tmp2 = 63; move16(); } - + q_diff = sub( q_tmp1, q_tmp2 ); q_tmp1 = s_min( q_tmp1, q_tmp2 ); if ( q_diff > 0 ) -- GitLab From 8d0b26d53a9cca5326201d60d1aac217d166f779 Mon Sep 17 00:00:00 2001 From: Nicolas Roussin Date: Wed, 12 Nov 2025 10:45:35 +0000 Subject: [PATCH 12/19] Fix MSVC build failure. --- lib_rend/ivas_dirac_dec_binaural_functions_fx.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c index e3b64440b..61cbb2308 100644 --- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c +++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c @@ -3719,22 +3719,22 @@ static void eig2x2_fx( Word32 nvalm[BINAURAL_CHANNELS]; Word16 nvalq[BINAURAL_CHANNELS]; - q_tmp2 = L_shl( q_tmp1, 1 ); + q_tmp2 = shl( q_tmp1, 1 ); q_min = s_min( q_tmp2, q_crossSquare ); q_min = s_min( q_min, eps_q ); Word64 nval64m[BINAURAL_CHANNELS]; - q_diff = L_sub( q_tmp2, q_min ); + q_diff = sub( q_tmp2, q_min ); nval64m[0] = W_shr( W_mult0_32_32( s_fx[0], s_fx[0] ), q_diff ); nval64m[1] = W_shr( W_mult0_32_32( s_fx[1], s_fx[1] ), q_diff ); - q_diff = L_sub( q_crossSquare, q_min ); + q_diff = sub( q_crossSquare, q_min ); crossSquare_fx = W_shr( crossSquare_fx, q_diff ); nval64m[0] = W_add( nval64m[0], crossSquare_fx ); nval64m[1] = W_add( nval64m[1], crossSquare_fx ); - q_diff = L_sub( eps_q, q_min ); + q_diff = sub( eps_q, q_min ); eps_fx = W_shr( eps_fx, q_diff ); nval64m[0] = W_add( nval64m[0], eps_fx ); nval64m[1] = W_add( nval64m[1], eps_fx ); -- GitLab From a9eb284cd8ef43adb00b3a641124e3ffafac1938 Mon Sep 17 00:00:00 2001 From: Nicolas Roussin Date: Wed, 19 Nov 2025 09:57:45 +0000 Subject: [PATCH 13/19] Address Markus' comments. --- lib_com/basop_util.c | 1 - lib_com/basop_util.h | 2 ++ .../ivas_dirac_dec_binaural_functions_fx.c | 24 +++++++++++++++---- 3 files changed, 22 insertions(+), 5 deletions(-) diff --git a/lib_com/basop_util.c b/lib_com/basop_util.c index bd55e59b6..6f4e00f45 100644 --- a/lib_com/basop_util.c +++ b/lib_com/basop_util.c @@ -1041,7 +1041,6 @@ Word32 BASOP_Util_Divide3232_Scale_cadence( Word32 x, Word32 y, Word16 *s ) } #endif -Word32 div_w_newton( Word32 num, Word32 den ); /* Table of 256 precalculated estimates to be used by the "div_w_newton" function using the Newton/Raphson method. diff --git a/lib_com/basop_util.h b/lib_com/basop_util.h index bfafdfacb..947c16922 100644 --- a/lib_com/basop_util.h +++ b/lib_com/basop_util.h @@ -334,6 +334,8 @@ Word32 BASOP_Util_Divide3232_Scale_cadence( Word32 x, /*!< i : Numerator*/ Word16 *s ); /*!< o : Additional scalefactor difference*/ #endif +Word32 div_w_newton( Word32 num, /*!< i : Numerator*/ + Word32 den ); /*!< i : Denominator*/ Word32 BASOP_Util_Divide3232_Scale_newton( Word32 x, /*!< i : Numerator*/ Word32 y, /*!< i : Denominator*/ diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c index 61cbb2308..1849e318a 100644 --- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c +++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c @@ -3747,13 +3747,29 @@ static void eig2x2_fx( nval64m[1] = W_shl( nval64m[1], q_diff ); nvalq[1] = add( q_min, q_diff ); - nvalm[0] = BASOP_Util_Divide3232_Scale_newton( ONE_IN_Q30, W_extract_h( nval64m[0] ), &exp ); - exp = sub( exp, sub( 62, nvalq[0] ) ); + // nvalm[0] = BASOP_Util_Divide3232_Scale_newton( ONE_IN_Q30, W_extract_h( nval64m[0] ), &exp ); + // exp = sub( exp, sub( 62, nvalq[0] ) ); + // + // is equivalent to: + // + // nvalm[0] = div_w_newton( ONE_IN_Q30, W_extract_h( nval64m[0] ) ); + // exp = sub( nvalq[0], 61 ); + + nvalm[0] = div_w_newton( ONE_IN_Q30, W_extract_h( nval64m[0] ) ); + exp = sub( nvalq[0], 61 ); nvalm[0] = Sqrt32( nvalm[0], &exp ); nvalq[0] = sub( 31, exp ); - nvalm[1] = BASOP_Util_Divide3232_Scale_newton( ONE_IN_Q30, W_extract_h( nval64m[1] ), &exp ); - exp = sub( exp, sub( 62, nvalq[1] ) ); + // nvalm[1] = BASOP_Util_Divide3232_Scale_newton( ONE_IN_Q30, W_extract_h( nval64m[1] ), &exp ); + // exp = sub( exp, sub( 62, nvalq[1] ) ); + // + // is equivalent to: + // + // nvalm[1] = div_w_newton( ONE_IN_Q30, W_extract_h( nval64m[1] ) ); + // exp = sub( nvalq[1], 61 ); + + nvalm[1] = div_w_newton( ONE_IN_Q30, W_extract_h( nval64m[1] ) ); + exp = sub( nvalq[1], 61 ); nvalm[1] = Sqrt32( nvalm[1], &exp ); nvalq[1] = sub( 31, exp ); -- GitLab From 3339c98f19d7525f7609c32d570e7e0ff7df8340 Mon Sep 17 00:00:00 2001 From: Nicolas Roussin Date: Wed, 19 Nov 2025 11:15:30 +0000 Subject: [PATCH 14/19] Fix div_w_newton when denominator is 0x40000000. --- lib_rend/ivas_dirac_dec_binaural_functions_fx.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c index 867677dd0..58099faca 100644 --- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c +++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c @@ -3492,6 +3492,19 @@ static void ivas_dirac_dec_binaural_check_and_switch_transports_headtracked_fx( return; } +#ifdef NONBE_OPT_2193_EIG2X2 +static Word32 eig2x2_div_fx( Word32 num, Word32 den ); + +static Word32 eig2x2_div_fx( Word32 num, Word32 den ) +{ + if ( EQ_32( den, 0x40000000 ) ) + { + return num; + } + return div_w_newton( num, den ); +} +#endif + static void eig2x2_fx( const Word32 E1_fx, /*q_E*/ const Word32 E2_fx, /*q_E*/ @@ -3729,7 +3742,7 @@ static void eig2x2_fx( // nvalm[0] = div_w_newton( ONE_IN_Q30, W_extract_h( nval64m[0] ) ); // exp = sub( nvalq[0], 61 ); - nvalm[0] = div_w_newton( ONE_IN_Q30, W_extract_h( nval64m[0] ) ); + nvalm[0] = eig2x2_div_fx( ONE_IN_Q30, W_extract_h( nval64m[0] ) ); exp = sub( nvalq[0], 61 ); nvalm[0] = Sqrt32( nvalm[0], &exp ); nvalq[0] = sub( 31, exp ); @@ -3742,7 +3755,7 @@ static void eig2x2_fx( // nvalm[1] = div_w_newton( ONE_IN_Q30, W_extract_h( nval64m[1] ) ); // exp = sub( nvalq[1], 61 ); - nvalm[1] = div_w_newton( ONE_IN_Q30, W_extract_h( nval64m[1] ) ); + nvalm[1] = eig2x2_div_fx( ONE_IN_Q30, W_extract_h( nval64m[1] ) ); exp = sub( nvalq[1], 61 ); nvalm[1] = Sqrt32( nvalm[1], &exp ); nvalq[1] = sub( 31, exp ); -- GitLab From 167f8cf4fa9db13775eba8fe6a4f17ccc365ebca Mon Sep 17 00:00:00 2001 From: Nicolas Roussin Date: Wed, 19 Nov 2025 13:32:50 +0000 Subject: [PATCH 15/19] Address Tommy's comments. --- .../ivas_dirac_dec_binaural_functions_fx.c | 112 +++++++++--------- 1 file changed, 54 insertions(+), 58 deletions(-) diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c index 58099faca..ced1f13ed 100644 --- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c +++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c @@ -3519,11 +3519,12 @@ static void eig2x2_fx( Word16 *q_D ) { #ifdef NONBE_OPT_2193_EIG2X2 - Word32 s_fx[BINAURAL_CHANNELS]; Word32 pm_fx, add_fx; Word32 tmp1, tmp2, e1, e2, c_re, c_im, c0_im, c1_im; + Word32 condition, s0_fx, s1_fx, nval0_fx, nval1_fx; Word64 crossSquare_fx, tmp3, tmp4; Word16 q_crossSquare, q_min, q_diff, q_tmp1, q_tmp2, exp, q_e, q_c; + Word16 nval0_q, nval1_q; Word32 i01, i00, i11, i10; Word64 eps_fx = ( (Word64) EPSILON_MANT ) << 32; Word16 eps_q = 63 - EPSILON_EXP; @@ -3634,8 +3635,8 @@ static void eig2x2_fx( // Numeric case, when input is practically zeros // if ( D_fx[0] < EPSILON_FX ) - - if ( LT_32( L_shl_sat( D_fx[0], sub( 31 - EPSILON_EXP, *q_D ) ), EPSILON_MANT ) ) + condition = LT_32( L_shl_sat( D_fx[0], sub( 31 - EPSILON_EXP, *q_D ) ), EPSILON_MANT ); + if ( condition != 0 ) { return; } @@ -3646,8 +3647,8 @@ static void eig2x2_fx( { tmp1 = L_shr( tmp1, q_diff ); } - - if ( LT_32( tmp2, tmp1 ) ) + condition = LT_32( tmp2, tmp1 ); + if ( condition != 0 ) { return; } @@ -3681,84 +3682,79 @@ static void eig2x2_fx( e2 = L_shl( e2, q_diff ); } - s_fx[0] = L_sub( tmp1, e1 ); // D_fx[0] - e1 - tmp1 = L_sub( tmp1, e2 ); // D_fx[0] - e2 - s_fx[1] = L_sub( tmp2, e1 ); // D_fx[1] - e1 - tmp2 = L_sub( tmp2, e2 ); // D_fx[1] - e2 + s0_fx = L_sub( tmp1, e1 ); // D_fx[0] - e1 + tmp1 = L_sub( tmp1, e2 ); // D_fx[0] - e2 + s1_fx = L_sub( tmp2, e1 ); // D_fx[1] - e1 + tmp2 = L_sub( tmp2, e2 ); // D_fx[1] - e2 - i01 = GT_32( L_abs( tmp1 ), L_abs( s_fx[0] ) ); // fabsf( D_fx[0] - e2 ) > fabsf( D_fx[0] - e1 ) - i11 = GT_32( L_abs( tmp2 ), L_abs( s_fx[1] ) ); // fabsf( D_fx[1] - e2 ) > fabsf( D_fx[1] - e1 ) + i01 = GT_32( L_abs( tmp1 ), L_abs( s0_fx ) ); // fabsf( D_fx[0] - e2 ) > fabsf( D_fx[0] - e1 ) + i11 = GT_32( L_abs( tmp2 ), L_abs( s1_fx ) ); // fabsf( D_fx[1] - e2 ) > fabsf( D_fx[1] - e1 ) if ( i01 ) { - s_fx[0] = tmp1; + s0_fx = tmp1; move32(); } if ( i11 ) { - s_fx[1] = tmp2; + s1_fx = tmp2; move32(); } // normVal = sqrtf( 1.0f / ( 1e-12f + crossSquare + s * s ) ); - Word32 nvalm[BINAURAL_CHANNELS]; - Word16 nvalq[BINAURAL_CHANNELS]; - q_tmp2 = shl( q_tmp1, 1 ); q_min = s_min( q_tmp2, q_crossSquare ); q_min = s_min( q_min, eps_q ); - Word64 nval64m[BINAURAL_CHANNELS]; - q_diff = sub( q_tmp2, q_min ); - nval64m[0] = W_shr( W_mult0_32_32( s_fx[0], s_fx[0] ), q_diff ); - nval64m[1] = W_shr( W_mult0_32_32( s_fx[1], s_fx[1] ), q_diff ); + tmp3 = W_shr( W_mult0_32_32( s0_fx, s0_fx ), q_diff ); + tmp4 = W_shr( W_mult0_32_32( s1_fx, s1_fx ), q_diff ); q_diff = sub( q_crossSquare, q_min ); crossSquare_fx = W_shr( crossSquare_fx, q_diff ); - nval64m[0] = W_add( nval64m[0], crossSquare_fx ); - nval64m[1] = W_add( nval64m[1], crossSquare_fx ); + tmp3 = W_add( tmp3, crossSquare_fx ); + tmp4 = W_add( tmp4, crossSquare_fx ); q_diff = sub( eps_q, q_min ); eps_fx = W_shr( eps_fx, q_diff ); - nval64m[0] = W_add( nval64m[0], eps_fx ); - nval64m[1] = W_add( nval64m[1], eps_fx ); + tmp3 = W_add( tmp3, eps_fx ); + tmp4 = W_add( tmp4, eps_fx ); - q_diff = W_norm( nval64m[0] ); - nval64m[0] = W_shl( nval64m[0], q_diff ); - nvalq[0] = add( q_min, q_diff ); + q_diff = W_norm( tmp3 ); + tmp3 = W_shl( tmp3, q_diff ); + nval0_q = add( q_min, q_diff ); - q_diff = W_norm( nval64m[1] ); - nval64m[1] = W_shl( nval64m[1], q_diff ); - nvalq[1] = add( q_min, q_diff ); + q_diff = W_norm( tmp4 ); + tmp4 = W_shl( tmp4, q_diff ); + nval1_q = add( q_min, q_diff ); - // nvalm[0] = BASOP_Util_Divide3232_Scale_newton( ONE_IN_Q30, W_extract_h( nval64m[0] ), &exp ); - // exp = sub( exp, sub( 62, nvalq[0] ) ); + // nval0_fx = BASOP_Util_Divide3232_Scale_newton( ONE_IN_Q30, W_extract_h( tmp3 ), &exp ); + // exp = sub( exp, sub( 62, nval0_q ) ); // // is equivalent to: // - // nvalm[0] = div_w_newton( ONE_IN_Q30, W_extract_h( nval64m[0] ) ); - // exp = sub( nvalq[0], 61 ); + // nval0_fx = div_w_newton( ONE_IN_Q30, W_extract_h( tmp3 ) ); + // exp = sub( nval0_q, 61 ); - nvalm[0] = eig2x2_div_fx( ONE_IN_Q30, W_extract_h( nval64m[0] ) ); - exp = sub( nvalq[0], 61 ); - nvalm[0] = Sqrt32( nvalm[0], &exp ); - nvalq[0] = sub( 31, exp ); + nval0_fx = eig2x2_div_fx( ONE_IN_Q30, W_extract_h( tmp3 ) ); + exp = sub( nval0_q, 61 ); + nval0_fx = Sqrt32( nval0_fx, &exp ); + nval0_q = sub( 31, exp ); - // nvalm[1] = BASOP_Util_Divide3232_Scale_newton( ONE_IN_Q30, W_extract_h( nval64m[1] ), &exp ); - // exp = sub( exp, sub( 62, nvalq[1] ) ); + // nval1_fx = BASOP_Util_Divide3232_Scale_newton( ONE_IN_Q30, W_extract_h( tmp4 ), &exp ); + // exp = sub( exp, sub( 62, nval1_q ) ); // // is equivalent to: // - // nvalm[1] = div_w_newton( ONE_IN_Q30, W_extract_h( nval64m[1] ) ); - // exp = sub( nvalq[1], 61 ); + // nval1_fx = div_w_newton( ONE_IN_Q30, W_extract_h( tmp4 ) ); + // exp = sub( nval1_q, 61 ); - nvalm[1] = eig2x2_div_fx( ONE_IN_Q30, W_extract_h( nval64m[1] ) ); - exp = sub( nvalq[1], 61 ); - nvalm[1] = Sqrt32( nvalm[1], &exp ); - nvalq[1] = sub( 31, exp ); + nval1_fx = eig2x2_div_fx( ONE_IN_Q30, W_extract_h( tmp4 ) ); + exp = sub( nval1_q, 61 ); + nval1_fx = Sqrt32( nval1_fx, &exp ); + nval1_q = sub( 31, exp ); q_diff = sub( q_c, q_tmp1 ); q_tmp1 = s_min( q_tmp1, q_c ); @@ -3775,25 +3771,25 @@ static void eig2x2_fx( if ( q_diff < 0 ) { - s_fx[0] = L_shl( s_fx[0], q_diff ); + s0_fx = L_shl( s0_fx, q_diff ); } if ( q_diff < 0 ) { - s_fx[1] = L_shl( s_fx[1], q_diff ); + s1_fx = L_shl( s1_fx, q_diff ); } - q_diff = sub( nvalq[0], nvalq[1] ); - q_tmp2 = s_min( nvalq[0], nvalq[1] ); + q_diff = sub( nval0_q, nval1_q ); + q_tmp2 = s_min( nval0_q, nval1_q ); if ( q_diff > 0 ) { - nvalm[0] = L_shr( nvalm[0], q_diff ); + nval0_fx = L_shr( nval0_fx, q_diff ); } if ( q_diff < 0 ) { - nvalm[1] = L_shl( nvalm[1], q_diff ); + nval1_fx = L_shl( nval1_fx, q_diff ); } *q_U = sub( add( q_tmp1, q_tmp2 ), 31 ); @@ -3815,18 +3811,18 @@ static void eig2x2_fx( c1_im = L_negate( c1_im ); } - Ure_fx[i00][0] = Mpy_32_32( s_fx[0], nvalm[0] ); + Ure_fx[i00][0] = Mpy_32_32( s0_fx, nval0_fx ); move32(); - Ure_fx[i01][0] = Mpy_32_32( c_re, nvalm[0] ); + Ure_fx[i01][0] = Mpy_32_32( c_re, nval0_fx ); move32(); - Uim_fx[i01][0] = Mpy_32_32( c0_im, nvalm[0] ); + Uim_fx[i01][0] = Mpy_32_32( c0_im, nval0_fx ); move32(); - Ure_fx[i10][1] = Mpy_32_32( s_fx[1], nvalm[1] ); + Ure_fx[i10][1] = Mpy_32_32( s1_fx, nval1_fx ); move32(); - Ure_fx[i11][1] = Mpy_32_32( c_re, nvalm[1] ); + Ure_fx[i11][1] = Mpy_32_32( c_re, nval1_fx ); move32(); - Uim_fx[i11][1] = Mpy_32_32( c1_im, nvalm[1] ); + Uim_fx[i11][1] = Mpy_32_32( c1_im, nval1_fx ); move32(); #else Word16 chA, chB, ch; -- GitLab From 50d6a56184a18e48aa2595f1f97b57546bdc4242 Mon Sep 17 00:00:00 2001 From: Nicolas Roussin Date: Wed, 19 Nov 2025 13:45:14 +0000 Subject: [PATCH 16/19] Address Tommy's comments. --- lib_rend/ivas_dirac_dec_binaural_functions_fx.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c index ced1f13ed..b6686bde1 100644 --- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c +++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c @@ -3497,7 +3497,7 @@ static Word32 eig2x2_div_fx( Word32 num, Word32 den ); static Word32 eig2x2_div_fx( Word32 num, Word32 den ) { - if ( EQ_32( den, 0x40000000 ) ) + IF ( EQ_32( den, 0x40000000 ) ) { return num; } @@ -3521,7 +3521,7 @@ static void eig2x2_fx( #ifdef NONBE_OPT_2193_EIG2X2 Word32 pm_fx, add_fx; Word32 tmp1, tmp2, e1, e2, c_re, c_im, c0_im, c1_im; - Word32 condition, s0_fx, s1_fx, nval0_fx, nval1_fx; + Word32 s0_fx, s1_fx, nval0_fx, nval1_fx; Word64 crossSquare_fx, tmp3, tmp4; Word16 q_crossSquare, q_min, q_diff, q_tmp1, q_tmp2, exp, q_e, q_c; Word16 nval0_q, nval1_q; @@ -3635,8 +3635,8 @@ static void eig2x2_fx( // Numeric case, when input is practically zeros // if ( D_fx[0] < EPSILON_FX ) - condition = LT_32( L_shl_sat( D_fx[0], sub( 31 - EPSILON_EXP, *q_D ) ), EPSILON_MANT ); - if ( condition != 0 ) + + IF ( LT_32( L_shl_sat( D_fx[0], sub( 31 - EPSILON_EXP, *q_D ) ), EPSILON_MANT ) ) { return; } @@ -3647,8 +3647,8 @@ static void eig2x2_fx( { tmp1 = L_shr( tmp1, q_diff ); } - condition = LT_32( tmp2, tmp1 ); - if ( condition != 0 ) + + IF ( LT_32( tmp2, tmp1 ) ) { return; } -- GitLab From 733ab359bd007e22a9c61896226053851946f0d1 Mon Sep 17 00:00:00 2001 From: Nicolas Roussin Date: Wed, 19 Nov 2025 14:13:00 +0000 Subject: [PATCH 17/19] Apply clang format. --- lib_rend/ivas_dirac_dec_binaural_functions_fx.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c index b6686bde1..2509129ac 100644 --- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c +++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c @@ -3497,7 +3497,7 @@ static Word32 eig2x2_div_fx( Word32 num, Word32 den ); static Word32 eig2x2_div_fx( Word32 num, Word32 den ) { - IF ( EQ_32( den, 0x40000000 ) ) + IF( EQ_32( den, 0x40000000 ) ) { return num; } @@ -3636,7 +3636,7 @@ static void eig2x2_fx( // Numeric case, when input is practically zeros // if ( D_fx[0] < EPSILON_FX ) - IF ( LT_32( L_shl_sat( D_fx[0], sub( 31 - EPSILON_EXP, *q_D ) ), EPSILON_MANT ) ) + IF( LT_32( L_shl_sat( D_fx[0], sub( 31 - EPSILON_EXP, *q_D ) ), EPSILON_MANT ) ) { return; } @@ -3648,7 +3648,7 @@ static void eig2x2_fx( tmp1 = L_shr( tmp1, q_diff ); } - IF ( LT_32( tmp2, tmp1 ) ) + IF( LT_32( tmp2, tmp1 ) ) { return; } -- GitLab From 5fbfa2c385b38f4a181a8810c913a0f58818b9e2 Mon Sep 17 00:00:00 2001 From: Nicolas Roussin Date: Tue, 2 Dec 2025 08:14:44 +0000 Subject: [PATCH 18/19] Address Thomas Dettbarn's comment. --- .../ivas_dirac_dec_binaural_functions_fx.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c index 31ccdd9cd..c07c573ea 100644 --- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c +++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c @@ -3507,12 +3507,6 @@ static void eig2x2_fx( set32_fx( (Word32 *) Ure_fx, 0, BINAURAL_CHANNELS * BINAURAL_CHANNELS ); set32_fx( (Word32 *) Uim_fx, 0, BINAURAL_CHANNELS * BINAURAL_CHANNELS ); - Ure_fx[0][0] = ONE_IN_Q30; - move32(); - Ure_fx[1][1] = ONE_IN_Q30; - move32(); - *q_U = Q30; - move16(); exp = sub( get_min_scalefactor( Cre_fx, Cim_fx ), 2 ); c_re = L_shl( Cre_fx, exp ); @@ -3612,6 +3606,12 @@ static void eig2x2_fx( IF( LT_32( L_shl_sat( D_fx[0], sub( 31 - EPSILON_EXP, *q_D ) ), EPSILON_MANT ) ) { + Ure_fx[0][0] = ONE_IN_Q30; + move32(); + Ure_fx[1][1] = ONE_IN_Q30; + move32(); + *q_U = Q30; + move16(); return; } @@ -3624,6 +3624,12 @@ static void eig2x2_fx( IF( LT_32( tmp2, tmp1 ) ) { + Ure_fx[0][0] = ONE_IN_Q30; + move32(); + Ure_fx[1][1] = ONE_IN_Q30; + move32(); + *q_U = Q30; + move16(); return; } -- GitLab From 28d31761171291f3343d223a8c47fa6a92440642 Mon Sep 17 00:00:00 2001 From: Nicolas Roussin Date: Wed, 3 Dec 2025 08:41:47 +0000 Subject: [PATCH 19/19] Apply clang format. --- lib_com/options.h | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/lib_com/options.h b/lib_com/options.h index 4509d48e3..f60dbd576 100644 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -110,12 +110,7 @@ #define OPT_2182_MATRIX_SCALE_OPS /* Dolby: Issue 2181, move matrix scale operations outside mul operations. */ #define OPT_2185_MATRIX_OUT_SCALING /* Dolby: Issue 2185, optimize matrix-mul output-format. */ #define OPT_2239_IVAS_FILTER_PROCESS /* Dolby: Issue 2239, optimize ivas_filter_process_fx. */ - -/* #################### End BASOP optimization switches ############################ */ - -/* #################### Start BASOP optimization switches ############################ */ - -#define NONBE_OPT_2193_EIG2X2 /* Dolby: Issue 2193, optimize eig2x2_fx. */ +#define NONBE_OPT_2193_EIG2X2 /* Dolby: Issue 2193, optimize eig2x2_fx. */ /* #################### End BASOP optimization switches ############################ */ -- GitLab