From 772490d57cfe07396224396a111bcabfcc3f4bf0 Mon Sep 17 00:00:00 2001 From: Markus Multrus Date: Sun, 15 Dec 2024 22:24:43 +0100 Subject: [PATCH 1/3] optimize ivas_dirac_dec_binaural_functions.c --- lib_com/options.h | 2 + lib_rend/ivas_dirac_dec_binaural_functions.c | 269 ++++++++++++++++++- 2 files changed, 267 insertions(+), 4 deletions(-) diff --git a/lib_com/options.h b/lib_com/options.h index 2a218e87d..5b6855dbd 100644 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -118,4 +118,6 @@ #define FIX_ISSUE_1156 /* Ittiam: Fix for Issue 1156: Encoder crash for Stereo at 32kbps in SWB_BWE_encoding_ivas_fx() */ #define FIX_DISCLAIMER /* VA: Add disclaimer for external renderer + Add info about IVAS reference version (FLP issue 1225) */ #define FIX_ISSUE_1167 /* Ittiam: Fix for Issue 1167: Encoder crash for OSBA ISM3SBA1 at 13.2 and 16.4 kbps in gauss_L2_ivas_fx() */ +#define FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC /* FhG: Reduce workload of binaural rendering: replace 1./tmp & sqrt by Isqrt32 */ +#define FIX_1113_OPT_DIRAC_BIN_REND /* FhG: Various optimizations to ivas_dirac_dec_binaual_functions.c */ #endif diff --git a/lib_rend/ivas_dirac_dec_binaural_functions.c b/lib_rend/ivas_dirac_dec_binaural_functions.c index 204ba1996..433da6188 100644 --- a/lib_rend/ivas_dirac_dec_binaural_functions.c +++ b/lib_rend/ivas_dirac_dec_binaural_functions.c @@ -31,9 +31,9 @@ *******************************************************************************************************/ #include +#include "options.h" #include #include -#include "options.h" #include "prot.h" #include "prot_fx.h" #include "ivas_prot.h" @@ -72,9 +72,13 @@ Word16 slot_fx[4] = { 32767, 16384, 10922, 8192 }; #define LOG_10_BASE_2_Q29 1783446528 // Q29 #define TAN_30_FX 17157 // Q15 #define INV_TAN30_FX 28377 // Q14 -#define EPSILON_MANT 1180591621 /* 1e-12 in Q70 */ +#define EPSILON_MANT 1180591621 /* 1e-12 = 0.5497558*(2^-39) in Q70 */ #define EPSILON_EXP ( -39 ) -#define ADAPT_HTPROTO_ROT_LIM_1 0.8f +#ifdef FIX_1113_OPT_DIRAC_BIN_REND +#define ONE_DIV_EPSILON_MANT 1953125000 /* 1e+12 = 0.9094947*(2^40) */ +#define ONE_DIV_EPSILON_EXP ( 40 ) +#endif +#define ADAPT_HTPROTO_ROT_LIM_1 0.8f #define MAX_GAIN_CACHE_SIZE ( ( MASA_MAXIMUM_DIRECTIONS * 3 ) + MAX_NUM_OBJECTS ) /* == different calls to get gains */ @@ -3278,10 +3282,17 @@ static void eig2x2_fx( tmp3 = BASOP_Util_Add_Mant32Exp( tmp2, sub( 31, q_tmp2 ), epsilon_mant, epsilon_exp, &exp_tmp3 ); +#if 1 tmp2 = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, tmp3, &exp ); exp = sub( exp, sub( Q30, sub( 31, exp_tmp3 ) ) ); normVal_fx = Sqrt32( tmp2, &exp ); // q_tmp2 q_tmp2 = sub( 31, exp ); +#else + /* Note: This code part does not work yet, see pipeline issue for BASOP #1009 */ + /* although the same code works at other places: mantissa and q_format is fine */ + normVal_fx = ISqrt32( tmp3, &exp ); + q_tmp2 = sub( 31, exp ); +#endif IF( LT_16( q_tmp1, q_c ) ) { @@ -3347,10 +3358,15 @@ static void eig2x2_fx( tmp3 = BASOP_Util_Add_Mant32Exp( tmp2, sub( 31, q_tmp2 ), epsilon_mant, epsilon_exp, &exp_tmp3 ); +#ifndef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC tmp2 = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, tmp3, &exp ); exp = sub( exp, sub( Q30, sub( 31, exp_tmp3 ) ) ); normVal_fx = Sqrt32( tmp2, &exp ); // q_tmp2 q_tmp2 = sub( 31, exp ); +#else + normVal_fx = ISqrt32( tmp3, &exp_tmp3 ); + q_tmp2 = sub( 31, exp_tmp3 ); +#endif IF( LT_16( q_tmp1, q_c ) ) { @@ -3405,9 +3421,13 @@ static void eig2x2_fx( } IF( q_U_1 != 0 ) - *q_U = q_U_1; + { + *q_U = q_U_1; + } ELSE + { *q_U = q_U_2; + } move16(); return; @@ -3463,8 +3483,10 @@ static void matrixMul_fx( Word16 chA, chB; Word16 min_q_shift1, min_q_shift2; Word16 size = i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ); +#ifndef FIX_1113_OPT_DIRAC_BIN_REND #ifndef IVAS_ENH64_CADENCE_CHANGES Word32 tmp1, tmp2; +#endif #endif min_q_shift1 = sub( s_min( L_norm_arr( Are_fx[0], size ), L_norm_arr( Aim_fx[0], size ) ), 1 ); @@ -3493,6 +3515,18 @@ static void matrixMul_fx( move32(); outIm_fx[chA][chB] = L_add( outIm_fx[chA][chB], W_extract_h( W_mac_32_32( W_mult_32_32( Are_fx[chA][0], Bim_fx[0][chB] ), Are_fx[chA][1], Bim_fx[1][chB] ) ) ); move32(); +#else +#ifdef FIX_1113_OPT_DIRAC_BIN_REND + outRe_fx[chA][chB] = Msub_32_32( Msub_32_32( Madd_32_32( Mpy_32_32( Are_fx[chA][0], Bre_fx[0][chB] ), + Are_fx[chA][1], Bre_fx[1][chB] ), + Aim_fx[chA][0], Bim_fx[0][chB] ), + Aim_fx[chA][1], Bim_fx[1][chB] ); + move32(); + outIm_fx[chA][chB] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Aim_fx[chA][0], Bre_fx[0][chB] ), + Aim_fx[chA][1], Bre_fx[1][chB] ), + Are_fx[chA][0], Bim_fx[0][chB] ), + Are_fx[chA][1], Bim_fx[1][chB] ); + move32(); #else test(); test(); @@ -3592,6 +3626,7 @@ static void matrixMul_fx( } outIm_fx[chA][chB] = L_add( outIm_fx[chA][chB], L_add( tmp1, tmp2 ) ); move32(); +#endif #endif /* #ifdef IVAS_ENH64_CADENCE_CHANGES */ } } @@ -3620,12 +3655,26 @@ static void matrixTransp1Mul_fx( { Word16 chA, chB; Word16 size = i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ); +#ifndef FIX_1113_OPT_DIRAC_BIN_REND Word32 tmp1, tmp2; +#endif FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ ) { FOR( chB = 0; chB < BINAURAL_CHANNELS; chB++ ) { +#ifdef FIX_1113_OPT_DIRAC_BIN_REND + outRe_fx[chA][chB] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Are_fx[0][chA], Bre_fx[0][chB] ), + Are_fx[1][chA], Bre_fx[1][chB] ), + Aim_fx[0][chA], Bim_fx[0][chB] ), + Aim_fx[1][chA], Bim_fx[1][chB] ); + move32(); + outIm_fx[chA][chB] = Msub_32_32( Msub_32_32( Madd_32_32( Mpy_32_32( Are_fx[0][chA], Bim_fx[0][chB] ), + Are_fx[1][chA], Bim_fx[1][chB] ), + Aim_fx[0][chA], Bre_fx[0][chB] ), + Aim_fx[1][chA], Bre_fx[1][chB] ); + move32(); +#else test(); test(); test(); @@ -3692,6 +3741,7 @@ static void matrixTransp1Mul_fx( tmp2 = L_negate( Mpy_32_32( L_abs( Are_fx[1][chA] ), L_abs( Bim_fx[1][chB] ) ) ); outIm_fx[chA][chB] = L_add( outIm_fx[chA][chB], L_add( tmp1, tmp2 ) ); move32(); +#endif } } *q_out = sub( add( q_A, q_B ), 31 ); @@ -3720,8 +3770,10 @@ static void matrixTransp2Mul_fx( Word16 chA, chB; Word16 min_q_shift; Word16 size = i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ); +#ifndef FIX_1113_OPT_DIRAC_BIN_REND #ifndef IVAS_ENH64_CADENCE_CHANGES Word32 tmp1, tmp2; +#endif #endif min_q_shift = sub( s_min( L_norm_arr( Are_fx[0], size ), L_norm_arr( Aim_fx[0], size ) ), 1 ); @@ -3748,6 +3800,18 @@ static void matrixTransp2Mul_fx( move32(); outIm_fx[chA][chB] = L_add( outIm_fx[chA][chB], W_extract_h( W_mac_32_32( W_mult_32_32( Are_fx[chA][0], L_negate( Bim_fx[chB][0] ) ), Are_fx[chA][1], L_negate( Bim_fx[chB][1] ) ) ) ); move32(); +#else +#ifdef FIX_1113_OPT_DIRAC_BIN_REND + outRe_fx[chA][chB] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Are_fx[chA][0], Bre_fx[chB][0] ), + Are_fx[chA][1], Bre_fx[chB][1] ), + Aim_fx[chA][0], Bim_fx[chB][0] ), + Aim_fx[chA][1], Bim_fx[chB][1] ); + move32(); + outIm_fx[chA][chB] = Msub_32_32( Msub_32_32( Madd_32_32( Mpy_32_32( Aim_fx[chA][0], Bre_fx[chB][0] ), + Aim_fx[chA][1], Bre_fx[chB][1] ), + Are_fx[chA][0], Bim_fx[chB][0] ), + Are_fx[chA][1], Bim_fx[chB][1] ); + move32(); #else test(); test(); @@ -3816,6 +3880,7 @@ static void matrixTransp2Mul_fx( tmp2 = L_negate( Mpy_32_32( L_abs( Are_fx[chA][1] ), L_abs( -Bim_fx[chB][1] ) ) ); outIm_fx[chA][chB] = L_add( outIm_fx[chA][chB], L_add( tmp1, tmp2 ) ); move32(); +#endif #endif /* #ifdef IVAS_ENH64_CADENCE_CHANGES */ } } @@ -3890,6 +3955,7 @@ static void chol2x2_fx( } ELSE { +#ifndef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC outRe[1][0] = BASOP_Util_Divide3232_Scale_cadence( c_re, outRe[0][0], &exp ); move32(); q_re2 = add( sub( 31, exp ), sub( q_c, q_re1 ) ); @@ -3897,6 +3963,33 @@ static void chol2x2_fx( outIm[1][0] = BASOP_Util_Divide3232_Scale_cadence( c_im, outRe[0][0], &exp ); move32(); q_im = add( sub( 31, exp ), sub( q_c, q_re1 ) ); +#else + Word32 denom; + Word16 den_exp; + Word32 my_outRe, my_outIm; + + /* Compute denom = 1.0 / outRe[0][0] */ + denom = ISqrt32( outRe[0][0], &exp ); + denom = Mpy_32_32( denom, denom ); + den_exp = shl( exp, 1 ); + + /* Normalise c_re, c_im */ + exp = norm_l( c_re ); + my_outRe = L_shl( c_re, exp ); + q_re2 = add( q_c, exp ); + exp = norm_l( c_im ); + my_outIm = L_shl( c_im, exp ); + q_im = add( q_c, exp ); + + /* Multiply and store c_re*denom and c_im*denom */ + outRe[1][0] = Mpy_32_32( denom, my_outRe ); + move32(); + q_re2 = sub( q_re2, den_exp ); + + outIm[1][0] = Mpy_32_32( denom, my_outIm ); + move32(); + q_im = sub( q_im, den_exp ); +#endif } if ( outRe[1][0] == 0 ) { @@ -3915,8 +4008,16 @@ static void chol2x2_fx( // 4611686 = Q62 IF( e1 == 0 ) { +#ifndef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC temp = BASOP_Util_Divide3232_Scale_cadence( temp, 4611686, &exp ); q_tmp = add( sub( 31, exp ), sub( q_tmp, 62 ) ); +#else + Word16 norm = norm_l( temp ); + temp = L_shl( temp, norm ); + q_tmp = add( q_tmp, norm ); + temp = Mpy_32_32( temp, ONE_DIV_EPSILON_MANT ); + q_tmp = sub( q_tmp, ONE_DIV_EPSILON_EXP ); +#endif } ELSE { @@ -4146,8 +4247,15 @@ static void formulate2x2MixingMatrix_fx( // 4611686 = Q62 IF( maxEne_fx == 0 ) { +#ifdef FIX_1113_OPT_DIRAC_BIN_REND + maxEneDiv_fx = ONE_DIV_EPSILON_MANT; + move32(); + q_maxEneDiv = 31 - ONE_DIV_EPSILON_EXP; + move16(); +#else maxEneDiv_fx = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, 4611686, &exp ); // 4611686 = 1e-12f in Q62 q_maxEneDiv = add( sub( 31, exp ), sub( Q30, 62 ) ); +#endif } ELSE { @@ -4207,8 +4315,24 @@ static void formulate2x2MixingMatrix_fx( IF( temp == 0 ) { +#ifdef FIX_1113_OPT_DIRAC_BIN_REND + IF( E_out1 == 0 ) + { + Ghat_fx[0] = 0; + exp = -19; + move32(); + move16(); + } + ELSE + { + temp = BASOP_Util_Divide3232_Scale_cadence( E_out1, 4611686, &exp ); // 4611686 = Q62 + exp = sub( exp, sub( q_eout, 62 ) ); + Ghat_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp + } +#else BASOP_Util_Divide3232_Scale_cadence( E_out1, 4611686, &exp ); // 4611686 = Q62 exp = sub( exp, sub( q_eout, 62 ) ); +#endif } ELSE { @@ -4216,16 +4340,36 @@ static void formulate2x2MixingMatrix_fx( temp = BASOP_Util_Divide3232_Scale_cadence( E_out1, temp, &exp ); exp = sub( exp, sub( q_eout, sub( 31, exp_temp ) ) ); +#ifdef FIX_1113_OPT_DIRAC_BIN_REND + Ghat_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp +#endif } +#ifndef FIX_1113_OPT_DIRAC_BIN_REND Ghat_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp +#endif move32(); temp = Mpy_32_32( E_in1, 2147484 ); // 2147484 = 0.001f in Q31 temp = L_max( temp, E_in2 ); // q_ein IF( temp == 0 ) { +#ifdef FIX_1113_OPT_DIRAC_BIN_REND + IF( E_out2 == 0 ) + { /* We can set hard-coded results */ + Ghat_fx[1] = 0; + exp1 = -19; + move16(); + } + ELSE + { + temp = BASOP_Util_Divide3232_Scale_cadence( E_out2, 4611686, &exp1 ); // 4611686 = Q62 + exp1 = sub( exp1, sub( q_eout, 62 ) ); + Ghat_fx[1] = Sqrt32( temp, &exp1 ); // Q = 31 - exp1 + } +#else BASOP_Util_Divide3232_Scale_cadence( E_out2, 4611686, &exp1 ); // 4611686 = Q62 exp1 = sub( exp1, sub( q_eout, 62 ) ); +#endif } ELSE { @@ -4233,8 +4377,13 @@ static void formulate2x2MixingMatrix_fx( temp = BASOP_Util_Divide3232_Scale_cadence( E_out2, temp, &exp1 ); exp1 = sub( exp1, sub( q_eout, sub( 31, exp_temp ) ) ); +#ifdef FIX_1113_OPT_DIRAC_BIN_REND + Ghat_fx[1] = Sqrt32( temp, &exp1 ); // Q = 31 - exp1 +#endif } +#ifndef FIX_1113_OPT_DIRAC_BIN_REND Ghat_fx[1] = Sqrt32( temp, &exp1 ); // Q = 31 - exp1 +#endif move32(); q_Ghat = sub( 31, s_max( exp, exp1 ) ); @@ -4283,8 +4432,13 @@ static void formulate2x2MixingMatrix_fx( IF( D_fx[0] == 0 ) { +#ifndef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC temp = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, 4611686, &exp ); // 4611686 = 1e-12 in Q62 exp = sub( exp, sub( Q30, 62 ) ); +#else + temp = ONE_DIV_EPSILON_MANT; /* Result of 1.0/eps with full precision */ + exp = ONE_DIV_EPSILON_EXP; +#endif } ELSE { @@ -4296,8 +4450,13 @@ static void formulate2x2MixingMatrix_fx( IF( D_fx[1] == 0 ) { +#ifndef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC temp = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, 4611686, &exp1 ); // 4611686 = 1e-12 in Q62 exp1 = sub( exp1, sub( Q30, 62 ) ); +#else + temp = ONE_DIV_EPSILON_MANT; /* Result of 1.0/eps with full precision */ + exp1 = ONE_DIV_EPSILON_EXP; +#endif } ELSE { @@ -4400,25 +4559,61 @@ static void formulate2x2MixingMatrix_fx( matrixTransp2Mul_fx( tmpRe_fx, tmpIm_fx, &q_temp, Ure_fx, Uim_fx, &q_U, Pre_fx, Pim_fx, &q_P ); /* Nearest orthonormal matrix P to matrix A formulated */ /* These are the final formulas of the JAES publication M = Ky P Kx^(-1) */ +#if ( BINAURAL_CHANNELS != 2 ) FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ ) { FOR( chB = 0; chB < BINAURAL_CHANNELS; chB++ ) { IF( Sx_fx[chB] == 0 ) { +#ifdef FIX_1113_OPT_DIRAC_BIN_REND + Pre_fx[chA][chB] = Mpy_32_32( Pre_fx[chA][chB], ONE_DIV_EPSILON_MANT ); + // q_Pre[chA][chB] = add(sub(31, q_P), 31 - ONE_DIV_EPSILON_EXP); + q_Pre[chA][chB] = sub( 62 - ONE_DIV_EPSILON_EXP, q_P ); + + + Pim_fx[chA][chB] = Mpy_32_32( Pim_fx[chA][chB], ONE_DIV_EPSILON_MANT ); + // q_Pim[chA][chB] = add(sub(31, q_P), 31 - ONE_DIV_EPSILON_EXP); + q_Pim[chA][chB] = sub( 62 - ONE_DIV_EPSILON_EXP, q_P ); +#else Pre_fx[chA][chB] = BASOP_Util_Divide3232_Scale_cadence( Pre_fx[chA][chB], 4611686, &exp ); // 4611686 = 1e-12 in Q62 q_Pre[chA][chB] = add( sub( q_P, 62 ), sub( 31, exp ) ); Pim_fx[chA][chB] = BASOP_Util_Divide3232_Scale_cadence( Pim_fx[chA][chB], 4611686, &exp ); // 4611686 = 1e-12 in Q62 q_Pim[chA][chB] = add( sub( q_P, 62 ), sub( 31, exp ) ); +#endif } ELSE { +#ifdef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC + Word16 Pre_shift, Pim_shift; +#endif temp = BASOP_Util_Add_Mant32Exp( Sx_fx[chB], sub( 31, q_Sx ), EPSILON_MANT, EPSILON_EXP, &exp_temp ); +#ifndef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC Pre_fx[chA][chB] = BASOP_Util_Divide3232_Scale_cadence( Pre_fx[chA][chB], temp, &exp ); q_Pre[chA][chB] = add( sub( q_P, sub( 31, exp_temp ) ), sub( 31, exp ) ); Pim_fx[chA][chB] = BASOP_Util_Divide3232_Scale_cadence( Pim_fx[chA][chB], temp, &exp ); q_Pim[chA][chB] = add( sub( q_P, sub( 31, exp_temp ) ), sub( 31, exp ) ); +#else + temp = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, temp, &exp ); + Pre_shift = norm_l( Pre_fx[chA][chB] ); + Pim_shift = norm_l( Pim_fx[chA][chB] ); + Pre_fx[chA][chB] = Mpy_32_32( L_shl( Pre_fx[chA][chB], Pre_shift ), temp ); + Pim_fx[chA][chB] = Mpy_32_32( L_shl( Pim_fx[chA][chB], Pim_shift ), temp ); + q_temp = add( sub( sub( q_P, exp ), sub( 31, Q30 ) ), exp_temp ); + q_Pre[chA][chB] = add( q_temp, Pre_shift ); + q_Pim[chA][chB] = add( q_temp, Pim_shift ); +#endif + } + if ( Pre_fx[chA][chB] == 0 ) + { + q_Pre[chA][chB] = 31; + move16(); + } + if ( Pim_fx[chA][chB] == 0 ) + { + q_Pim[chA][chB] = 31; + move16(); } move32(); move32(); @@ -4426,6 +4621,72 @@ static void formulate2x2MixingMatrix_fx( move16(); } } +#else + /* BINAURAL_CHANNEL == 2 */ + FOR( chB = 0; chB < BINAURAL_CHANNELS; chB++ ) + { + IF( Sx_fx[chB] == 0 ) + { + Pre_fx[0][chB] = Mpy_32_32( Pre_fx[0][chB], ONE_DIV_EPSILON_MANT ); + q_Pre[0][chB] = sub( 62 - ONE_DIV_EPSILON_EXP, q_P ); + Pim_fx[0][chB] = Mpy_32_32( Pim_fx[0][chB], ONE_DIV_EPSILON_MANT ); + q_Pim[0][chB] = sub( 62 - ONE_DIV_EPSILON_EXP, q_P ); + Pre_fx[1][chB] = Mpy_32_32( Pre_fx[1][chB], ONE_DIV_EPSILON_MANT ); + q_Pre[1][chB] = sub( 62 - ONE_DIV_EPSILON_EXP, q_P ); + Pim_fx[1][chB] = Mpy_32_32( Pim_fx[1][chB], ONE_DIV_EPSILON_MANT ); + q_Pim[1][chB] = sub( 62 - ONE_DIV_EPSILON_EXP, q_P ); + } + ELSE + { + Word16 Pre_shift, Pim_shift; + temp = BASOP_Util_Add_Mant32Exp( Sx_fx[chB], sub( 31, q_Sx ), EPSILON_MANT, EPSILON_EXP, &exp_temp ); + temp = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, temp, &exp ); + q_temp = add( sub( sub( q_P, exp ), sub( 31, Q30 ) ), exp_temp ); + + Pre_shift = norm_l( Pre_fx[0][chB] ); + Pim_shift = norm_l( Pim_fx[0][chB] ); + Pre_fx[0][chB] = Mpy_32_32( L_shl( Pre_fx[0][chB], Pre_shift ), temp ); + Pim_fx[0][chB] = Mpy_32_32( L_shl( Pim_fx[0][chB], Pim_shift ), temp ); + q_Pre[0][chB] = add( q_temp, Pre_shift ); + q_Pim[0][chB] = add( q_temp, Pim_shift ); + + Pre_shift = norm_l( Pre_fx[1][chB] ); + Pim_shift = norm_l( Pim_fx[1][chB] ); + Pre_fx[1][chB] = Mpy_32_32( L_shl( Pre_fx[1][chB], Pre_shift ), temp ); + Pim_fx[1][chB] = Mpy_32_32( L_shl( Pim_fx[1][chB], Pim_shift ), temp ); + q_Pre[1][chB] = add( q_temp, Pre_shift ); + q_Pim[1][chB] = add( q_temp, Pim_shift ); + } + if ( Pre_fx[0][chB] == 0 ) + { + q_Pre[0][chB] = 31; + move16(); + } + if ( Pim_fx[0][chB] == 0 ) + { + q_Pim[0][chB] = 31; + move16(); + } + if ( Pre_fx[1][chB] == 0 ) + { + q_Pre[1][chB] = 31; + move16(); + } + if ( Pim_fx[1][chB] == 0 ) + { + q_Pim[1][chB] = 31; + move16(); + } + move32(); + move32(); + move16(); + move16(); + move32(); + move32(); + move16(); + move16(); + } +#endif minimum_s( q_Pre[0], i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ), &exp ); q_P = s_min( q_P, exp ); minimum_s( q_Pim[0], i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ), &exp ); -- GitLab From 0d0272942031a842d4bbf1920549b55beb176f59 Mon Sep 17 00:00:00 2001 From: Markus Multrus Date: Sun, 15 Dec 2024 22:28:39 +0100 Subject: [PATCH 2/3] defines --- lib_rend/ivas_dirac_dec_binaural_functions.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/lib_rend/ivas_dirac_dec_binaural_functions.c b/lib_rend/ivas_dirac_dec_binaural_functions.c index 433da6188..644e84398 100644 --- a/lib_rend/ivas_dirac_dec_binaural_functions.c +++ b/lib_rend/ivas_dirac_dec_binaural_functions.c @@ -74,7 +74,7 @@ Word16 slot_fx[4] = { 32767, 16384, 10922, 8192 }; #define INV_TAN30_FX 28377 // Q14 #define EPSILON_MANT 1180591621 /* 1e-12 = 0.5497558*(2^-39) in Q70 */ #define EPSILON_EXP ( -39 ) -#ifdef FIX_1113_OPT_DIRAC_BIN_REND +#ifdef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC #define ONE_DIV_EPSILON_MANT 1953125000 /* 1e+12 = 0.9094947*(2^40) */ #define ONE_DIV_EPSILON_EXP ( 40 ) #endif @@ -4247,7 +4247,7 @@ static void formulate2x2MixingMatrix_fx( // 4611686 = Q62 IF( maxEne_fx == 0 ) { -#ifdef FIX_1113_OPT_DIRAC_BIN_REND +#ifdef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC maxEneDiv_fx = ONE_DIV_EPSILON_MANT; move32(); q_maxEneDiv = 31 - ONE_DIV_EPSILON_EXP; @@ -4315,7 +4315,7 @@ static void formulate2x2MixingMatrix_fx( IF( temp == 0 ) { -#ifdef FIX_1113_OPT_DIRAC_BIN_REND +#ifdef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC IF( E_out1 == 0 ) { Ghat_fx[0] = 0; @@ -4340,11 +4340,11 @@ static void formulate2x2MixingMatrix_fx( temp = BASOP_Util_Divide3232_Scale_cadence( E_out1, temp, &exp ); exp = sub( exp, sub( q_eout, sub( 31, exp_temp ) ) ); -#ifdef FIX_1113_OPT_DIRAC_BIN_REND +#ifdef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC Ghat_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp #endif } -#ifndef FIX_1113_OPT_DIRAC_BIN_REND +#ifndef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC Ghat_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp #endif move32(); @@ -4353,7 +4353,7 @@ static void formulate2x2MixingMatrix_fx( temp = L_max( temp, E_in2 ); // q_ein IF( temp == 0 ) { -#ifdef FIX_1113_OPT_DIRAC_BIN_REND +#ifdef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC IF( E_out2 == 0 ) { /* We can set hard-coded results */ Ghat_fx[1] = 0; @@ -4377,11 +4377,11 @@ static void formulate2x2MixingMatrix_fx( temp = BASOP_Util_Divide3232_Scale_cadence( E_out2, temp, &exp1 ); exp1 = sub( exp1, sub( q_eout, sub( 31, exp_temp ) ) ); -#ifdef FIX_1113_OPT_DIRAC_BIN_REND +#ifdef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC Ghat_fx[1] = Sqrt32( temp, &exp1 ); // Q = 31 - exp1 #endif } -#ifndef FIX_1113_OPT_DIRAC_BIN_REND +#ifndef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC Ghat_fx[1] = Sqrt32( temp, &exp1 ); // Q = 31 - exp1 #endif move32(); @@ -4566,7 +4566,7 @@ static void formulate2x2MixingMatrix_fx( { IF( Sx_fx[chB] == 0 ) { -#ifdef FIX_1113_OPT_DIRAC_BIN_REND +#ifdef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC Pre_fx[chA][chB] = Mpy_32_32( Pre_fx[chA][chB], ONE_DIV_EPSILON_MANT ); // q_Pre[chA][chB] = add(sub(31, q_P), 31 - ONE_DIV_EPSILON_EXP); q_Pre[chA][chB] = sub( 62 - ONE_DIV_EPSILON_EXP, q_P ); -- GitLab From ab28b55264d593f9059e8fa3ae571121daf57294 Mon Sep 17 00:00:00 2001 From: Markus Multrus Date: Mon, 13 Jan 2025 15:55:12 +0100 Subject: [PATCH 3/3] revert problematic ISqrt32() call, and stick to default code --- lib_rend/ivas_dirac_dec_binaural_functions.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib_rend/ivas_dirac_dec_binaural_functions.c b/lib_rend/ivas_dirac_dec_binaural_functions.c index 644e84398..29d337dc6 100644 --- a/lib_rend/ivas_dirac_dec_binaural_functions.c +++ b/lib_rend/ivas_dirac_dec_binaural_functions.c @@ -3358,12 +3358,14 @@ static void eig2x2_fx( tmp3 = BASOP_Util_Add_Mant32Exp( tmp2, sub( 31, q_tmp2 ), epsilon_mant, epsilon_exp, &exp_tmp3 ); -#ifndef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC +#if 1 tmp2 = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, tmp3, &exp ); exp = sub( exp, sub( Q30, sub( 31, exp_tmp3 ) ) ); normVal_fx = Sqrt32( tmp2, &exp ); // q_tmp2 q_tmp2 = sub( 31, exp ); #else + /* Note: This code part does not work yet, see pipeline issue for BASOP #1009 */ + /* although the same code works at other places: mantissa and q_format is fine */ normVal_fx = ISqrt32( tmp3, &exp_tmp3 ); q_tmp2 = sub( 31, exp_tmp3 ); #endif -- GitLab