diff --git a/lib_com/options.h b/lib_com/options.h index 2c70dd14f900bcf3ae42a391332364aa639d8451..2a0bf02997ab0db51ad438871dfb58c85907ac84 100644 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -77,6 +77,10 @@ /* ################### Start MAINTENANCE switches ########################### */ #define FIX_2255_ISAR_RENDER_POSES /* VA: issue 2255: fix missing check in isar_render_poses() */ +/* ################### Start BE switches ################################# */ +/* only BE switches wrt wrt. TS 26.251 V3.0 */ + + /* ################### Start BE switches ################################# */ /* only BE switches wrt wrt. TS 26.251 V3.0 */ @@ -122,19 +126,16 @@ /* ##################### End NON-BE switches ########################### */ + /* ################## End MAINTENANCE switches ######################### */ /* clang-format on */ /* #################### Start BASOP optimization switches ############################ */ -#define OPT_2181_MATRIX_TRANSP_1_MUL /* Dolby: Issue 2181, optimize matrixTransp1Mul_fx. */ -#define OPT_2182_MATRIX_SCALE_OPS /* Dolby: Issue 2181, move matrix scale operations outside mul operations. */ -#define OPT_2185_MATRIX_OUT_SCALING /* Dolby: Issue 2185, optimize matrix-mul output-format. */ -#define NONBE_OPT_2239_IVAS_FILTER_PROCESS /* Dolby: Issue 2239, optimize ivas_filter_process_fx. */ -#define NONBE_OPT_2193_EIG2X2 /* Dolby: Issue 2193, optimize eig2x2_fx. */ -#define BE_FIX_2240_COMPUTE_COV_MTC_FX_FAST /* FhG: Speeds up covariance calculation e.g. 60 WMOPS for encoding -mc 7_1_4 24400 48 */ - +#define NONBE_OPT_2239_IVAS_FILTER_PROCESS /* Dolby: Issue 2239, optimize ivas_filter_process_fx. */ +#define BE_FIX_2240_COMPUTE_COV_MTC_FX_FAST /* FhG: Speeds up covariance calculation e.g. 60 WMOPS for encoding -mc 7_1_4 24400 48 */ +#define NONBE_2169_BINAURAL_MIXING_MATRIX_OPT /* Dolby: use optimized formulate2x2MixingMatrix() function and subfunctions */ /* #################### End BASOP optimization switches ############################ */ diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c index 0152bf465a270b99520b097404e72c8b69770fcb..d4e6a6b2828cf49cd7bb66c9f2ad7870321c8d7c 100644 --- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c +++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c @@ -51,17 +51,19 @@ static Word16 slot_fx[4] = { 32767, 16384, 10922, 8192 }; #define STEREO_PREPROCESS_IIR_FACTOR_Q15 ( 29491 ) // 0.9f in Q15 -/* powf(0.95f, 4.0f) for sub-frame smoothing instead of CLDFB slot */ -#define ADAPT_HTPROTO_ROT_LIM_0_FX 429496736 // 0.4f in Q30 -#define TWO_POINT_FIVE_IN_Q13 20480 // Q13 -#define ADAPT_HTPROTO_IIR_FAC_FX 26689 // Q15 -#define LOG_10_BASE_2_Q29 1783446528 // Q29 -#define TAN_30_FX 17157 // Q15 -#define INV_TAN30_FX 28377 // Q14 -#define EPSILON_MANT 1180591621 /* 1e-12 = 0.5497558*(2^-39) in Q70 */ + /* powf(0.95f, 4.0f) for sub-frame smoothing instead of CLDFB slot */ +#define ADAPT_HTPROTO_ROT_LIM_0_FX 429496736 // 0.4f in Q30 +#define TWO_POINT_FIVE_IN_Q13 20480 // Q13 +#define ADAPT_HTPROTO_IIR_FAC_FX 26689 // Q15 +#define LOG_10_BASE_2_Q29 1783446528 // Q29 +#define TAN_30_FX 17157 // Q15 +#define INV_TAN30_FX 28377 // Q14 +#define EPSILON_MANT 1180591621 /* 1e-12 = 0.5497558*(2^-39) in Q70 */ #define EPSILON_EXP ( -39 ) #define ONE_DIV_EPSILON_MANT 1953125000 /* 1e+12 = 0.9094947*(2^40) */ #define ONE_DIV_EPSILON_EXP ( 40 ) +#define TEN_THOUSAND_Q17 1310720000 /* 10000.0 in Q17 */ +#define ONE_DIV_100000000_Q57 1441147784 /* 1.0 / (10000.0 * 10000.0) in Q57 */ #define SMALL_EIGENVALUE 50 @@ -117,17 +119,11 @@ static void ivas_masa_ext_rend_parambin_internal_fx( MASA_EXT_REND_HANDLE hMasaE static void formulate2x2MixingMatrix_fx( Word32 Ein1_fx /*q_Ein*/, Word32 Ein2_fx /*q_Ein*/, Word16 q_Ein, Word32 CinRe_fx /*q_Cin*/, Word32 CinIm_fx /*q_Cin*/, Word16 q_Cin, Word32 Eout1_fx /*q_Eout*/, Word32 Eout2_fx /*q_Eout*/, Word16 q_Eout, Word32 CoutRe_fx /*q_Cout*/, Word32 CoutIm_fx /*q_Cout*/, Word16 q_Cout, Word32 Q_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*Q31*/, Word32 Mre_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_M*/, Word32 Mim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_M*/, Word16 *q_M, const Word16 regularizationFactor_fx /*Q14*/ ); -#ifdef OPT_2182_MATRIX_SCALE_OPS static void matrixScale_fx( Word32 Are_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_A*/, Word32 Aim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_A*/, Word16 *q_A ); -#endif static void matrixMul_fx( Word32 Are[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_A*/, Word32 Aim[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_A*/, Word16 *q_A, Word32 Bre[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_B*/, Word32 Bim[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_B*/, Word16 *q_B, Word32 outRe[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_out*/, Word32 outIm[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_out*/, Word16 *q_out ); -#ifdef OPT_2182_MATRIX_SCALE_OPS static void matrixTransp2Mul_fx( Word32 Are[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_A*/, Word32 Aim[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_A*/, Word16 *q_A, Word32 Bre[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_B*/, Word32 Bim[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_B*/, Word16 *q_B, Word32 outRe[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_out*/, Word32 outIm[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_out*/, Word16 *q_out ); -#else -static void matrixTransp2Mul_fx( Word32 Are[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_A*/, Word32 Aim[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_A*/, Word16 *q_A, Word32 Bre[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_B*/, Word32 Bim[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_B*/, Word16 *q_B, Word32 Ascale, Word32 Bscale, Word32 outRe[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_out*/, Word32 outIm[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_out*/, Word16 *q_out ); -#endif /*------------------------------------------------------------------------- @@ -1198,8 +1194,8 @@ static void ivas_dirac_dec_binaural_internal_fx( move16(); FOR( ch = 0; ch < BINAURAL_CHANNELS; ch++ ) { - scale_sig32( output_fx[ch], imult1616( nBins, hSpatParamRendCom->subframe_nbslots[subframe] ), sub( 11, q_out ) ); // Scaling to Q11 - scale_sig32( st_ivas->cldfbSynDec[ch]->cldfb_state_fx, st_ivas->cldfbSynDec[ch]->cldfb_size, sub( Q11, st_ivas->cldfbSynDec[ch]->Q_cldfb_state ) ); + Scale_sig32( output_fx[ch], imult1616( nBins, hSpatParamRendCom->subframe_nbslots[subframe] ), sub( 11, q_out ) ); // Scaling to Q11 + Scale_sig32( st_ivas->cldfbSynDec[ch]->cldfb_state_fx, st_ivas->cldfbSynDec[ch]->cldfb_size, sub( Q11, st_ivas->cldfbSynDec[ch]->Q_cldfb_state ) ); st_ivas->cldfbSynDec[ch]->Q_cldfb_state = Q11; move16(); } @@ -2235,24 +2231,16 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx( } /* Make matrix multiplication M*Cx*M' to determine resulting covariance matrix of processing input with M */ -#ifdef OPT_2182_MATRIX_SCALE_OPS matrixScale_fx( Mre_fx, Mim_fx, &q_M ); matrixScale_fx( CxRe_fx, CxIm_fx, &q_Cx ); -#endif + matrixMul_fx( Mre_fx, Mim_fx, &q_M, CxRe_fx, CxIm_fx, &q_Cx, tmpMtxRe_fx, tmpMtxIm_fx, &q_tmp ); -#ifdef OPT_2182_MATRIX_SCALE_OPS + matrixScale_fx( tmpMtxRe_fx, tmpMtxIm_fx, &q_tmp ); matrixTransp2Mul_fx( tmpMtxRe_fx, tmpMtxIm_fx, &q_tmp, Mre_fx, Mim_fx, &q_M, resultMtxRe_fx, resultMtxIm_fx, &q_res ); -#else - matrixTransp2Mul_fx( - tmpMtxRe_fx, tmpMtxIm_fx, &q_tmp, Mre_fx, Mim_fx, &q_M, - 1 /*int Ascale*/, - 0 /*int Bscale*/, - resultMtxRe_fx, resultMtxIm_fx, &q_res ); -#endif /* When below the frequency limit where decorrelation is applied, we inject the decorrelated * residual (or missing) signal component. The procedure is active when there are not enough independent @@ -3495,345 +3483,214 @@ static void ivas_dirac_dec_binaural_check_and_switch_transports_headtracked_fx( return; } -#ifdef NONBE_OPT_2193_EIG2X2 -static Word32 eig2x2_div_fx( Word32 num, Word32 den ); - -static Word32 eig2x2_div_fx( Word32 num, Word32 den ) +#ifdef NONBE_2169_BINAURAL_MIXING_MATRIX_OPT +static Word32 Isqrt_newton_iter( Word32 val, Word32 isqrt_val, Word16 q_val, Word16 exp ) { - IF( EQ_32( den, 0x40000000 ) ) - { - return num; - } - return div_w_newton( num, den ); + Word32 temp; + + /* newton iteration for y = 1 / sqrt(x) + * y = y * 2 * (0.75 - 0.25 * x * y * y) */ + + temp = Mpy_32_32( isqrt_val, isqrt_val ); + temp = Mpy_32_32( val, temp ); + exp = sub( Q31, sub( q_val, add( exp, exp ) ) ); + + temp = L_sub( 0x60000000, L_shr( temp, sub( 2, exp ) ) ); + temp = Mpy_32_32( isqrt_val, temp ); + if ( val ) + isqrt_val = L_shl( temp, 1 ); + + return ( isqrt_val ); } -#endif static void eig2x2_fx( - const Word32 E1_fx, /*q_E*/ - const Word32 E2_fx, /*q_E*/ + Word32 E1_fx, /*q_E*/ + Word32 E2_fx, /*q_E*/ Word16 q_E, - const Word32 Cre_fx, /*q_C*/ - const Word32 Cim_fx, /*q_C*/ + Word32 Cre_fx, /*q_C*/ + Word32 Cim_fx, /*q_C*/ Word16 q_C, - Word32 Ure_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_U*/ - Word32 Uim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_U*/ + Word32 Ure_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_U, 1 bit Headroom */ + Word32 Uim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_U, 1 bit Headroom */ Word16 *q_U, Word32 D_fx[BINAURAL_CHANNELS], /*q_D*/ Word16 *q_D ) { -#ifdef NONBE_OPT_2193_EIG2X2 - Word32 pm_fx, add_fx; - Word32 tmp1, tmp2, e1, e2, c_re, c_im, c0_im, c1_im; - Word32 s0_fx, s1_fx, nval0_fx, nval1_fx; - Word64 crossSquare_fx, tmp3, tmp4; - Word16 q_crossSquare, q_min, q_diff, q_tmp1, q_tmp2, exp, q_e, q_c; - Word16 nval0_q, nval1_q; - Word32 i01, i00, i11, i10; - Word64 eps_fx = ( (Word64) EPSILON_MANT ) << 32; - Word16 eps_q = 63 - EPSILON_EXP; - move32(); - move16(); + Word16 ch; + Word32 _D_fx[BINAURAL_CHANNELS]; + Word32 add_fx, sub_fx, pm_fx, isqrt_fx, s_fx, s1_fx, s2_fx, normVal_fx, normVal_isqrt_fx; + Word64 crossSquare_fx, a_fx; + Word16 q_E2, q_C2, q_a, q_min, exp, q_pm, q_s, q_s2, q_norm; + Word32 i0, i1; set32_fx( (Word32 *) Ure_fx, 0, BINAURAL_CHANNELS * BINAURAL_CHANNELS ); set32_fx( (Word32 *) Uim_fx, 0, BINAURAL_CHANNELS * BINAURAL_CHANNELS ); - exp = sub( get_min_scalefactor( Cre_fx, Cim_fx ), 2 ); - c_re = L_shl( Cre_fx, exp ); - c_im = L_shl( Cim_fx, exp ); - q_c = add( q_C, exp ); + exp = get_min_scalefactor( E1_fx, E2_fx ); + E1_fx = L_shl( E1_fx, exp ); + E2_fx = L_shl( E2_fx, exp ); + q_E = add( q_E, exp ); + q_E2 = add( q_E, q_E ); - exp = sub( get_min_scalefactor( E1_fx, E2_fx ), 2 ); - e1 = L_shl( E1_fx, exp ); - e2 = L_shl( E2_fx, exp ); - q_e = add( q_E, exp ); + exp = get_min_scalefactor( Cre_fx, Cim_fx ); + Cre_fx = L_shl( Cre_fx, exp ); + Cim_fx = L_shl( Cim_fx, exp ); + q_C = add( q_C, exp ); + q_C2 = add( q_C, q_C ); - // crossSquare_fx = (c_re * c_re) + (c_im * c_im) - // a_fx = (e1 + e2) * (e1 + e2) - 4.0f * ((e1 * e2) - crossSquare_fx) = (e1 - e2)^2 + 4 * crossSquare_fx - // pm_fx = 0.5f * sqrtf(max(0.0f, a_fx)) - // add_fx = 0.5f * (e1 + e2) + add_fx = W_extract_l( W_shr( W_add( W_deposit32_l( E1_fx ), W_deposit32_l( E2_fx ) ), 1 ) ); /* 0.5 * ( E1 + E2 ) */ + sub_fx = L_sub( E1_fx, E2_fx ); - tmp1 = L_sub( e1, e2 ); - tmp3 = W_mult_32_32( tmp1, tmp1 ); - q_tmp1 = add( add( q_e, q_e ), 1 ); - if ( !tmp3 ) - { - q_tmp1 = 63; - move16(); - } + /* + * crossSquare = (Cre * Cre) + (Cim * Cim) + */ + crossSquare_fx = W_add( W_mult0_32_32( Cre_fx, Cre_fx ), W_mult0_32_32( Cim_fx, Cim_fx ) ); /* 2 * q_C */ - crossSquare_fx = W_mac_32_32( W_mult_32_32( c_re, c_re ), c_im, c_im ); - q_crossSquare = add( add( q_c, q_c ), 1 ); - if ( !crossSquare_fx ) - { - q_crossSquare = 63; - move16(); - } + /* + * a = (E1 + E2) * (E1 + E2) - 4.0f * ((E1 * E2) - crossSquare) + * a = (E1 - E2) ^ 2 + 4 * crossSquare + */ - tmp4 = crossSquare_fx; - move64(); - q_tmp2 = sub( q_crossSquare, 2 ); - if ( !tmp4 ) - { - q_tmp2 = 63; - move16(); - } + a_fx = W_mult0_32_32( sub_fx, sub_fx ); /* 2 * q_E */ - q_diff = sub( q_tmp1, q_tmp2 ); - q_tmp1 = s_min( q_tmp1, q_tmp2 ); - if ( q_diff > 0 ) - { - tmp3 = W_shr( tmp3, q_diff ); - } - if ( q_diff < 0 ) + q_C2 = add( q_C, q_C ); + q_a = sub( s_min( q_E2, q_C2 ), 2 ); + a_fx = W_add( W_shr( a_fx, sub( q_E2, q_a ) ), W_shr( crossSquare_fx, sub( q_C2, add( q_a, 2 ) ) ) ); + + exp = W_norm( a_fx ); + a_fx = W_shl( a_fx, exp ); + q_a = add( q_a, exp ); + pm_fx = W_extract_h( a_fx ); + pm_fx = L_max( 0, pm_fx ); + q_pm = sub( q_a, Q32 ); + pm_fx = L_max( pm_fx, 0 ); + if ( !pm_fx ) { - tmp4 = W_shl( tmp4, q_diff ); + q_pm = Q31; + move16(); } - tmp3 = W_add( tmp3, tmp4 ); - q_diff = W_norm( tmp3 ); - tmp3 = W_shl( tmp3, q_diff ); - q_tmp1 = add( q_tmp1, q_diff ); - - // pm_fx = 0.5f * sqrtf(max(0.0f, a_fx)) - exp = sub( 63, q_tmp1 ); - pm_fx = Sqrt32( L_max( 0, W_extract_h( tmp3 ) ), &exp ); - pm_fx = L_shr( pm_fx, 1 ); - q_tmp2 = sub( 31, exp ); - - // add_fx = 0.5 * (e1 + e2) - add_fx = L_shr( L_add( e1, e2 ), 1 ); - q_tmp1 = q_e; - move16(); - // D[0] = add + pm; - // D[1] = max( 0.0f, add - pm ); + /* + * add = 0.5f * ( E1 + E2 ) + * pm = 0.5f * sqrtf( max( 0.0f, a ) ) + * add = 0.5f * ( E1 + E2 ) + * D[0] = add + pm + * D[1] = max( 0.0f, add - pm ) + */ - q_diff = sub( q_tmp1, q_tmp2 ); + /* pm_fx is normalized */ + exp = sub( Q31, q_pm ); + isqrt_fx = Isqrt_lc1( pm_fx, &exp ); + isqrt_fx = Isqrt_newton_iter( pm_fx, isqrt_fx, q_pm, exp ); - tmp1 = add_fx; - move32(); - if ( q_diff > 0 ) - { - tmp1 = L_shr( tmp1, q_diff ); - } + pm_fx = Mpy_32_32( pm_fx, isqrt_fx ); + q_pm = sub( q_pm, exp ); + exp = norm_l( pm_fx ); + pm_fx = L_shl( pm_fx, exp ); + q_pm = add( q_pm, exp ); + q_pm = add( q_pm, 1 ); /* 0.5 * sqrt( pm ) */ - tmp2 = pm_fx; - move32(); - if ( q_diff < 0 ) - { - tmp2 = L_shl( tmp2, q_diff ); - } + /* normalize */ + q_min = s_min( q_pm, q_E ); + q_min = sub( q_min, 1 ); + add_fx = L_shr( add_fx, sub( q_E, q_min ) ); + pm_fx = L_shr( pm_fx, sub( q_pm, q_min ) ); - D_fx[0] = L_add( tmp1, tmp2 ); - move32(); - D_fx[1] = L_max( L_sub( tmp1, tmp2 ), 0 ); - move32(); - *q_D = s_min( q_tmp1, q_tmp2 ); - move32(); + D_fx[0] = L_add_sat( add_fx, pm_fx ); + D_fx[1] = L_max( 0, L_sub( add_fx, pm_fx ) ); - // Numeric case, when input is practically zeros - // if ( D_fx[0] < EPSILON_FX ) + *q_D = q_min; + move16(); - IF( LT_32( L_shl_sat( D_fx[0], sub( 31 - EPSILON_EXP, *q_D ) ), EPSILON_MANT ) ) + *q_U = Q30; + move16(); + /* Numeric case, when input is practically zeros */ + IF( LT_32( L_shl_sat( D_fx[0], sub( sub( 31, *q_D ), EPSILON_EXP ) ), EPSILON_MANT ) ) { Ure_fx[0][0] = ONE_IN_Q30; - move32(); Ure_fx[1][1] = ONE_IN_Q30; - move32(); - *q_U = Q30; - move16(); return; } - // Numeric case, when input is near an identity matrix with a gain - tmp1 = Mpy_32_32( INV_1000_Q31, add_fx ); - if ( q_diff > 0 ) - { - tmp1 = L_shr( tmp1, q_diff ); - } - - IF( LT_32( tmp2, tmp1 ) ) + /* Numeric case, when input is near an identity matrix with a gain */ + IF( LT_32( pm_fx, Mpy_32_32( INV_1000_Q31, add_fx ) ) ) { Ure_fx[0][0] = ONE_IN_Q30; - move32(); Ure_fx[1][1] = ONE_IN_Q30; - move32(); - *q_U = Q30; - move16(); return; } - // Eigenvectors - - q_diff = sub( q_e, *q_D ); - q_tmp1 = s_min( q_e, *q_D ); - - tmp1 = D_fx[0]; - move32(); - if ( q_diff > 0 ) - { - tmp1 = L_shr( tmp1, q_diff ); - } - - tmp2 = D_fx[1]; - move32(); - if ( q_diff > 0 ) - { - tmp2 = L_shr( tmp2, q_diff ); - } + /* Eigenvectors */ + q_s = s_min( *q_D, q_E ); + q_s2 = add( q_s, q_s ); - if ( q_diff < 0 ) - { - e1 = L_shl( e1, q_diff ); - } + _D_fx[0] = L_shr( D_fx[0], sub( *q_D, q_s ) ); + _D_fx[1] = L_shr( D_fx[1], sub( *q_D, q_s ) ); + E1_fx = L_shr( E1_fx, sub( q_E, q_s ) ); + E2_fx = L_shr( E2_fx, sub( q_E, q_s ) ); - if ( q_diff < 0 ) + FOR( ch = 0; ch < BINAURAL_CHANNELS; ch++ ) { - e2 = L_shl( e2, q_diff ); - } - - s0_fx = L_sub( tmp1, e1 ); // D_fx[0] - e1 - tmp1 = L_sub( tmp1, e2 ); // D_fx[0] - e2 - s1_fx = L_sub( tmp2, e1 ); // D_fx[1] - e1 - tmp2 = L_sub( tmp2, e2 ); // D_fx[1] - e2 + s1_fx = L_sub( _D_fx[ch], E1_fx ); + s2_fx = L_sub( _D_fx[ch], E2_fx ); - i01 = GT_32( L_abs( tmp1 ), L_abs( s0_fx ) ); // fabsf( D_fx[0] - e2 ) > fabsf( D_fx[0] - e1 ) - i11 = GT_32( L_abs( tmp2 ), L_abs( s1_fx ) ); // fabsf( D_fx[1] - e2 ) > fabsf( D_fx[1] - e1 ) - - if ( i01 ) - { - s0_fx = tmp1; - move32(); - } + i0 = LE_32( L_abs( s2_fx ), L_abs( s1_fx ) ); + i1 = L_xor( i0, 1 ); - if ( i11 ) - { - s1_fx = tmp2; + s_fx = s1_fx; move32(); - } - - // normVal = sqrtf( 1.0f / ( 1e-12f + crossSquare + s * s ) ); - - q_tmp2 = shl( q_tmp1, 1 ); - q_min = s_min( q_tmp2, q_crossSquare ); - q_min = s_min( q_min, eps_q ); - - q_diff = sub( q_tmp2, q_min ); - tmp3 = W_shr( W_mult0_32_32( s0_fx, s0_fx ), q_diff ); - tmp4 = W_shr( W_mult0_32_32( s1_fx, s1_fx ), q_diff ); - - q_diff = sub( q_crossSquare, q_min ); - crossSquare_fx = W_shr( crossSquare_fx, q_diff ); - tmp3 = W_add( tmp3, crossSquare_fx ); - tmp4 = W_add( tmp4, crossSquare_fx ); - - q_diff = sub( eps_q, q_min ); - eps_fx = W_shr( eps_fx, q_diff ); - tmp3 = W_add( tmp3, eps_fx ); - tmp4 = W_add( tmp4, eps_fx ); - - q_diff = W_norm( tmp3 ); - tmp3 = W_shl( tmp3, q_diff ); - nval0_q = add( q_min, q_diff ); - - q_diff = W_norm( tmp4 ); - tmp4 = W_shl( tmp4, q_diff ); - nval1_q = add( q_min, q_diff ); - - // nval0_fx = BASOP_Util_Divide3232_Scale_newton( ONE_IN_Q30, W_extract_h( tmp3 ), &exp ); - // exp = sub( exp, sub( 62, nval0_q ) ); - // - // is equivalent to: - // - // nval0_fx = div_w_newton( ONE_IN_Q30, W_extract_h( tmp3 ) ); - // exp = sub( nval0_q, 61 ); - - nval0_fx = eig2x2_div_fx( ONE_IN_Q30, W_extract_h( tmp3 ) ); - exp = sub( nval0_q, 61 ); - nval0_fx = Sqrt32( nval0_fx, &exp ); - nval0_q = sub( 31, exp ); - - // nval1_fx = BASOP_Util_Divide3232_Scale_newton( ONE_IN_Q30, W_extract_h( tmp4 ), &exp ); - // exp = sub( exp, sub( 62, nval1_q ) ); - // - // is equivalent to: - // - // nval1_fx = div_w_newton( ONE_IN_Q30, W_extract_h( tmp4 ) ); - // exp = sub( nval1_q, 61 ); - - nval1_fx = eig2x2_div_fx( ONE_IN_Q30, W_extract_h( tmp4 ) ); - exp = sub( nval1_q, 61 ); - nval1_fx = Sqrt32( nval1_fx, &exp ); - nval1_q = sub( 31, exp ); - - q_diff = sub( q_c, q_tmp1 ); - q_tmp1 = s_min( q_tmp1, q_c ); - - if ( q_diff > 0 ) - { - c_re = L_shr( c_re, q_diff ); - } - - if ( q_diff > 0 ) - { - c_im = L_shr( c_im, q_diff ); - } - - if ( q_diff < 0 ) - { - s0_fx = L_shl( s0_fx, q_diff ); - } - - if ( q_diff < 0 ) - { - s1_fx = L_shl( s1_fx, q_diff ); - } + if ( !i0 ) + { + s_fx = s2_fx; + move32(); + } - q_diff = sub( nval0_q, nval1_q ); - q_tmp2 = s_min( nval0_q, nval1_q ); + a_fx = W_mult0_32_32( s_fx, s_fx ); /* 2*q_s */ + q_min = s_min( q_s2, q_C2 ); + a_fx = W_shr( a_fx, sub( q_s2, q_min ) ); + a_fx = W_add( a_fx, W_shr( crossSquare_fx, sub( q_C2, q_min ) ) ); /* add 1e-12 ommitted */ - if ( q_diff > 0 ) - { - nval0_fx = L_shr( nval0_fx, q_diff ); - } + exp = W_norm( a_fx ); + a_fx = W_shl( a_fx, exp ); + normVal_fx = W_extract_h( a_fx ); + q_norm = sub( add( q_min, exp ), Q31 + 1 ); - if ( q_diff < 0 ) - { - nval1_fx = L_shl( nval1_fx, q_diff ); - } + /* normVal_fx is normalized */ + exp = sub( Q31, q_norm ); + normVal_isqrt_fx = Isqrt_lc1( normVal_fx, &exp ); + normVal_isqrt_fx = Isqrt_newton_iter( normVal_fx, normVal_isqrt_fx, q_norm, exp ); - *q_U = sub( add( q_tmp1, q_tmp2 ), 31 ); + q_norm = add( Q31, exp ); - i00 = L_sub( 1, i01 ); - i10 = L_sub( 1, i11 ); + a_fx = W_mult0_32_32( s_fx, normVal_isqrt_fx ); /* q_s + q_norm */ + a_fx = W_shl( a_fx, sub( q_norm, q_s ) ); /* Q62 */ + Ure_fx[i0][ch] = W_extract_h( a_fx ); /* Q30 */ - c0_im = c_im; - move32(); - if ( i00 > 0 ) - { - c0_im = L_negate( c0_im ); - } + a_fx = W_mult0_32_32( Cre_fx, normVal_isqrt_fx ); /* q_C + q_norm */ + a_fx = W_shl( a_fx, sub( q_norm, q_C ) ); /* Q62 */ + Ure_fx[i1][ch] = W_extract_h( a_fx ); /* Q30 */ - c1_im = c_im; - move32(); - if ( i10 > 0 ) - { - c1_im = L_negate( c1_im ); + a_fx = W_mult0_32_32( Cim_fx, normVal_isqrt_fx ); /* q_C + q_norm */ + a_fx = W_shl( a_fx, sub( q_norm, q_C ) ); /* Q62 */ + if ( i0 ) + a_fx = W_neg( a_fx ); + Uim_fx[i1][ch] = W_extract_h( a_fx ); /* Q30 */ } - - Ure_fx[i00][0] = Mpy_32_32( s0_fx, nval0_fx ); - move32(); - Ure_fx[i01][0] = Mpy_32_32( c_re, nval0_fx ); - move32(); - Uim_fx[i01][0] = Mpy_32_32( c0_im, nval0_fx ); - move32(); - - Ure_fx[i10][1] = Mpy_32_32( s1_fx, nval1_fx ); - move32(); - Ure_fx[i11][1] = Mpy_32_32( c_re, nval1_fx ); - move32(); - Uim_fx[i11][1] = Mpy_32_32( c1_im, nval1_fx ); - move32(); +} #else +static void eig2x2_fx( + const Word32 E1_fx, /*q_E*/ + const Word32 E2_fx, /*q_E*/ + Word16 q_E, + const Word32 Cre_fx, /*q_C*/ + const Word32 Cim_fx, /*q_C*/ + Word16 q_C, + Word32 Ure_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_U*/ + Word32 Uim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_U*/ + Word16 *q_U, + Word32 D_fx[BINAURAL_CHANNELS], /*q_D*/ + Word16 *q_D ) +{ Word16 chA, chB, ch; Word32 s_fx, normVal_fx, crossSquare_fx, a_fx, pm_fx, add_fx; Word32 tmp1, tmp2, tmp3, e1, e2, c_re, c_im; @@ -3872,15 +3729,15 @@ static void eig2x2_fx( q_e = add( q_E, exp ); /*crossSquare_fx = (c_re * c_re) + (c_im * c_im) - a_fx = (e1 + e2) * (e1 + e2) - 4.0f * ((e1 * e2) - crossSquare_fx) = (e1 - e2)^2 + 4 * crossSquare_fx - pm_fx = 0.5f * sqrtf(max(0.0f, a_fx)) - add_fx = 0.5f * (e1 + e2)*/ + a_fx = (e1 + e2) * (e1 + e2) - 4.0f * ((e1 * e2) - crossSquare_fx) = (e1 - e2)^2 + 4 * crossSquare_fx + pm_fx = 0.5f * sqrtf(max(0.0f, a_fx)) + add_fx = 0.5f * (e1 + e2)*/ IF( L_and( c_re == 0, c_im == 0 ) ) { /* if c_re = 0 and c_im = 0, then crossSquare_fx = (c_re * c_re) + (c_im * c_im) = 0 - a_fx = (E1 - E2)^2 - pm_fx = 0.5 * sqrt(max(0, a_fx)) = 0.5 * max(0, (e1 - e2)) */ + a_fx = (E1 - E2)^2 + pm_fx = 0.5 * sqrt(max(0, a_fx)) = 0.5 * max(0, (e1 - e2)) */ crossSquare_fx = 0; move32(); q_crossSquare = Q31; @@ -3896,7 +3753,7 @@ static void eig2x2_fx( IF( EQ_32( e1, e2 ) ) { /* if e1 - e2 = 0, then a_fx = 4 * crossSquare_fx - pm_fx = 0.5 * sqrt(max(0, 4 * crossSquare_fx)) = sqrt(0, crossSquare_fx)*/ + pm_fx = 0.5 * sqrt(max(0, 4 * crossSquare_fx)) = sqrt(0, crossSquare_fx)*/ test(); test(); IF( EQ_32( c_re, 0 ) || LT_32( L_abs( c_re ), ONE_IN_Q15 ) ) @@ -3923,8 +3780,8 @@ static void eig2x2_fx( ELSE { /* if e1, e2 >> c_re, c_im then (e1 - e2)^2 ~ (e1 - e2)^2 + 4 * crossSquare_fx - a_fx = (E1 - E2)^2 - pm_fx = 0.5 * sqrt(max(0, a_fx)) = 0.5 * max(0, (e1 - e2)) */ + a_fx = (E1 - E2)^2 + pm_fx = 0.5 * sqrt(max(0, a_fx)) = 0.5 * max(0, (e1 - e2)) */ IF( GT_16( sub( q_c, q_e ), Q15 ) ) { @@ -4200,9 +4057,9 @@ static void eig2x2_fx( *q_U = q_U_2; move16(); } -#endif return; } +#endif static void matrixDiagMul_fx( Word32 reIn_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_In*/ @@ -4216,9 +4073,7 @@ static void matrixDiagMul_fx( { Word16 chA, chB; -#ifdef OPT_2185_MATRIX_OUT_SCALING Word32 not_zero = 0; -#endif FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ ) { @@ -4228,35 +4083,22 @@ static void matrixDiagMul_fx( imOut_fx[chA][chB] = Mpy_32_32( imIn_fx[chA][chB], D_fx[chB] ); move32(); move32(); -#ifdef OPT_2185_MATRIX_OUT_SCALING not_zero = L_or( not_zero, reOut_fx[chA][chB] ); not_zero = L_or( not_zero, imOut_fx[chA][chB] ); -#endif } } *q_Out = sub( add( q_In, q_D ), 31 ); move16(); -#ifdef OPT_2185_MATRIX_OUT_SCALING if ( !not_zero ) { *q_Out = Q31; move16(); } -#else - Word16 size = i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ); - if ( L_and( is_zero_arr( reOut_fx[0], size ), is_zero_arr( imOut_fx[0], size ) ) ) - { - *q_Out = Q31; - move16(); - } -#endif - return; } -#ifdef OPT_2182_MATRIX_SCALE_OPS static void matrixScale_fx( Word32 Are_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_A*/ Word32 Aim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_A*/ @@ -4270,7 +4112,6 @@ static void matrixScale_fx( *q_A = add( *q_A, shift ); move16(); } -#endif static void matrixMul_fx( Word32 Are_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_A*/ @@ -4283,29 +4124,8 @@ static void matrixMul_fx( Word32 outIm_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_out*/ Word16 *q_out ) { -#ifdef OPT_2182_MATRIX_SCALE_OPS Word16 chA, chB; -#else - Word16 chA, chB; - Word16 min_q_shift1, min_q_shift2; - Word16 size = i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ); - - min_q_shift1 = sub( s_min( L_norm_arr( Are_fx[0], size ), L_norm_arr( Aim_fx[0], size ) ), 1 ); - min_q_shift2 = sub( s_min( L_norm_arr( Bre_fx[0], size ), L_norm_arr( Bim_fx[0], size ) ), 1 ); - - scale_sig32( Are_fx[0], size, min_q_shift1 ); - scale_sig32( Aim_fx[0], size, min_q_shift1 ); - scale_sig32( Bre_fx[0], size, min_q_shift2 ); - scale_sig32( Bim_fx[0], size, min_q_shift2 ); - - *q_A = add( *q_A, min_q_shift1 ); - *q_B = add( *q_B, min_q_shift2 ); - move16(); - move16(); -#endif -#ifdef OPT_2185_MATRIX_OUT_SCALING Word32 not_zero = 0; -#endif FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ ) { @@ -4332,32 +4152,18 @@ static void matrixMul_fx( Are_fx[chA][1], Bim_fx[1][chB] ); move32(); #endif /* #ifdef IVAS_ENH64_CADENCE_CHANGES */ -#ifdef OPT_2185_MATRIX_OUT_SCALING not_zero = L_or( not_zero, outRe_fx[chA][chB] ); not_zero = L_or( not_zero, outIm_fx[chA][chB] ); -#endif } } *q_out = sub( add( *q_A, *q_B ), 31 ); move16(); -#ifdef OPT_2185_MATRIX_OUT_SCALING if ( !not_zero ) { *q_out = Q31; move16(); } -#else -#ifdef OPT_2182_MATRIX_SCALE_OPS - Word16 size = BINAURAL_CHANNELS * BINAURAL_CHANNELS; -#endif - if ( L_and( is_zero_arr( outRe_fx[0], size ), is_zero_arr( outIm_fx[0], size ) ) ) - { - *q_out = Q31; - move16(); - } -#endif - return; } @@ -4376,25 +4182,16 @@ static void matrixTransp1Mul_fx( Word64 tmp_outRe_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS]; Word64 tmp_outIm_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS]; -#ifdef OPT_2181_MATRIX_TRANSP_1_MUL Word64 tmp64; Word16 common_lsh, q; q = add( add( q_A, q_B ), 1 ); common_lsh = sub( 63, q ); move16(); -#else - Word16 q_tmp_outRe_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS]; - Word16 q_tmp_outIm_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS]; - Word64 tmp64_1, tmp64_2; - Word16 tmp16, q_common = 63; - move16(); -#endif FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ ) { FOR( chB = 0; chB < BINAURAL_CHANNELS; chB++ ) { -#ifdef OPT_2181_MATRIX_TRANSP_1_MUL tmp64 = W_mult_32_32( Are_fx[0][chA], Bre_fx[0][chB] ); tmp64 = W_mac_32_32( tmp64, Are_fx[1][chA], Bre_fx[1][chB] ); tmp64 = W_mac_32_32( tmp64, Aim_fx[0][chA], Bim_fx[0][chB] ); @@ -4408,93 +4205,32 @@ static void matrixTransp1Mul_fx( tmp_outIm_fx[chA][chB] = W_mac_32_32( tmp64, Are_fx[1][chA], Bim_fx[1][chB] ); move64(); common_lsh = s_min( common_lsh, W_norm( tmp_outIm_fx[chA][chB] ) ); -#else - tmp64_1 = W_mac_32_32( W_mult_32_32( Are_fx[0][chA], Bre_fx[0][chB] ), Are_fx[1][chA], Bre_fx[1][chB] ); // Q: add( add( q_A, q_B ), 1 ) - tmp64_2 = W_mac_32_32( W_mult_32_32( Aim_fx[0][chA], Bim_fx[0][chB] ), Aim_fx[1][chA], Bim_fx[1][chB] ); // Q: add( add( q_A, q_B ), 1 ) - tmp_outRe_fx[chA][chB] = W_add( tmp64_1, tmp64_2 ); // Q: add( add( q_A, q_B ), 1 ) - move64(); - tmp16 = W_norm( tmp_outRe_fx[chA][chB] ); - tmp_outRe_fx[chA][chB] = W_shl( tmp_outRe_fx[chA][chB], tmp16 ); // Q:add( tmp16, add( add( q_A, q_B ), 1 ) ) - move64(); - q_tmp_outRe_fx[chA][chB] = add( tmp16, add( add( q_A, q_B ), 1 ) ); - move16(); - q_common = s_min( q_tmp_outRe_fx[chA][chB], q_common ); - - - tmp64_1 = W_mac_32_32( W_mult_32_32( Are_fx[0][chA], Bim_fx[0][chB] ), Are_fx[1][chA], Bim_fx[1][chB] ); // Q: add( add( q_A, q_B ), 1 ) - tmp64_2 = W_mac_32_32( W_mult_32_32( Aim_fx[0][chA], Bre_fx[0][chB] ), Aim_fx[1][chA], Bre_fx[1][chB] ); // Q: add( add( q_A, q_B ), 1 ) - tmp_outIm_fx[chA][chB] = W_sub( tmp64_1, tmp64_2 ); // Q: add( add( q_A, q_B ), 1 ) - move64(); - tmp16 = W_norm( tmp_outIm_fx[chA][chB] ); - tmp_outIm_fx[chA][chB] = W_shl( tmp_outIm_fx[chA][chB], tmp16 ); // Q:add( tmp16, add( add( q_A, q_B ), 1 ) ) - move64(); - q_tmp_outIm_fx[chA][chB] = add( tmp16, add( add( q_A, q_B ), 1 ) ); - move16(); - q_common = s_min( q_tmp_outIm_fx[chA][chB], q_common ); -#endif } } -#ifdef OPT_2185_MATRIX_OUT_SCALING Word32 not_zero = 0; -#endif FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ ) { FOR( chB = 0; chB < BINAURAL_CHANNELS; chB++ ) { -#ifdef OPT_2181_MATRIX_TRANSP_1_MUL outRe_fx[chA][chB] = W_extract_h( W_shl( tmp_outRe_fx[chA][chB], common_lsh ) ); move32(); outIm_fx[chA][chB] = W_extract_h( W_shl( tmp_outIm_fx[chA][chB], common_lsh ) ); move32(); -#else - outRe_fx[chA][chB] = W_extract_h( W_shl( tmp_outRe_fx[chA][chB], s_max( -63, sub( q_common, q_tmp_outRe_fx[chA][chB] ) ) ) ); - move32(); - outIm_fx[chA][chB] = W_extract_h( W_shl( tmp_outIm_fx[chA][chB], s_max( -63, sub( q_common, q_tmp_outIm_fx[chA][chB] ) ) ) ); - move32(); -#endif -#ifdef OPT_2185_MATRIX_OUT_SCALING not_zero = L_or( not_zero, outRe_fx[chA][chB] ); not_zero = L_or( not_zero, outIm_fx[chA][chB] ); -#endif } } -#ifdef OPT_2181_MATRIX_TRANSP_1_MUL *q_out = sub( add( q, common_lsh ), 32 ); move16(); -#else - *q_out = sub( q_common, 32 ); - move16(); -#endif -#ifdef OPT_2185_MATRIX_OUT_SCALING if ( !not_zero ) { *q_out = Q31; move16(); } -#else - Word16 size = i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ); - if ( L_and( is_zero_arr( outRe_fx[0], size ), is_zero_arr( outIm_fx[0], size ) ) ) - { - *q_out = Q31; - move16(); - } -#endif return; } -#ifdef OPT_2182_MATRIX_SCALE_OPS -static void matrixTransp2Mul_fx( - Word32 Are_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_A*/ - Word32 Aim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_A*/ - Word16 *q_A, - Word32 Bre_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_B*/ - Word32 Bim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_B*/ - Word16 *q_B, - Word32 outRe_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_out*/ - Word32 outIm_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_out*/ - Word16 *q_out ) -#else static void matrixTransp2Mul_fx( Word32 Are_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_A*/ Word32 Aim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_A*/ @@ -4502,42 +4238,13 @@ static void matrixTransp2Mul_fx( Word32 Bre_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_B*/ Word32 Bim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_B*/ Word16 *q_B, - Word32 Ascale, - Word32 Bscale, Word32 outRe_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_out*/ Word32 outIm_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_out*/ Word16 *q_out ) -#endif { -#ifdef OPT_2182_MATRIX_SCALE_OPS Word16 chA, chB; // Word16 size = BINAURAL_CHANNELS * BINAURAL_CHANNELS; -#else - Word16 chA, chB; - Word16 min_q_shift; - Word16 size = BINAURAL_CHANNELS * BINAURAL_CHANNELS; - - IF( Ascale == 1 ) - { - min_q_shift = sub( s_min( L_norm_arr( Are_fx[0], size ), L_norm_arr( Aim_fx[0], size ) ), 1 ); - scale_sig32( Are_fx[0], size, min_q_shift ); - scale_sig32( Aim_fx[0], size, min_q_shift ); - *q_A = add( *q_A, min_q_shift ); - move16(); - } - - IF( Bscale == 1 ) - { - min_q_shift = sub( s_min( L_norm_arr( Bre_fx[0], size ), L_norm_arr( Bim_fx[0], size ) ), 1 ); - scale_sig32( Bre_fx[0], size, min_q_shift ); - scale_sig32( Bim_fx[0], size, min_q_shift ); - *q_B = add( *q_B, min_q_shift ); - move16(); - } -#endif -#ifdef OPT_2185_MATRIX_OUT_SCALING Word32 not_zero = 0; -#endif FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ ) { FOR( chB = 0; chB < BINAURAL_CHANNELS; chB++ ) @@ -4563,36 +4270,555 @@ static void matrixTransp2Mul_fx( Are_fx[chA][1], Bim_fx[chB][1] ); move32(); #endif /* #ifdef IVAS_ENH64_CADENCE_CHANGES */ -#ifdef OPT_2185_MATRIX_OUT_SCALING not_zero = L_or( not_zero, outRe_fx[chA][chB] ); not_zero = L_or( not_zero, outIm_fx[chA][chB] ); -#endif } } *q_out = sub( add( *q_A, *q_B ), 31 ); move16(); -#ifdef OPT_2185_MATRIX_OUT_SCALING if ( !not_zero ) { *q_out = Q31; move16(); } -#else -#ifdef OPT_2182_MATRIX_SCALE_OPS - Word16 size = BINAURAL_CHANNELS * BINAURAL_CHANNELS; -#endif - if ( L_and( is_zero_arr( outRe_fx[0], size ), is_zero_arr( outIm_fx[0], size ) ) ) - { - *q_out = Q31; - move16(); - } -#endif return; } +#ifdef NONBE_2169_BINAURAL_MIXING_MATRIX_OPT +static void chol2x2_fx( + Word32 E1, /*q_E*/ + Word32 E2, /*q_E*/ + Word16 q_E, + Word32 Cre, /*q_C*/ + Word32 Cim, /*q_C*/ + Word16 q_C, + Word32 outRe[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /* q_out, 1 bit Headroom */ + Word32 outIm[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /* q_out, 1 bit Headroom */ + Word16 *q_out ) +{ + Word32 E_tmp, sqrtVal, iSqrtVal; + Word16 q_sqrtVal, q_min; + Word16 exp = 0; + Word16 q_out_0_0, q_out_1_0, q_out_1_1; + Word32 i0, i1; + + set32_fx( (Word32 *) outRe, 0, BINAURAL_CHANNELS * BINAURAL_CHANNELS ); + set32_fx( (Word32 *) outIm, 0, BINAURAL_CHANNELS * BINAURAL_CHANNELS ); + + exp = get_min_scalefactor( E1, E2 ); + E1 = L_shl( E1, exp ); + E2 = L_shl( E2, exp ); + q_E = add( q_E, exp ); + + exp = get_min_scalefactor( Cre, Cim ); + Cre = L_shl( Cre, exp ); + Cim = L_shl( Cim, exp ); + q_C = add( q_C, exp ); + + i0 = LE_32( E1, E2 ); + i1 = L_xor( i0, 1 ); + + /* Perform Cholesky decomposition according to louder channel first */ + E_tmp = E1; + move32(); + + if ( i0 ) + { + E1 = E2; + move32(); + } + if ( i0 ) + { + E2 = E_tmp; + move32(); + } + + /* outRe[0][0] = sqrt(E1) */ + exp = sub( Q31, q_E ); + iSqrtVal = Isqrt_lc1( E1, &exp ); + iSqrtVal = Isqrt_newton_iter( E1, iSqrtVal, q_E, exp ); + outRe[i0][i0] = Mpy_32_32( E1, iSqrtVal ); + q_out_0_0 = sub( q_E, exp ); + + /* outRe[1][0] = Cre / (outRe[0][0]);*/ + q_out_1_0 = Q31; + move16(); + outRe[i1][i0] = Mpy_32_32( Cre, iSqrtVal ); + if ( Cre != 0 ) + q_out_1_0 = sub( q_C, exp ); + + /* outIm[1][0] = Cim / (outRe[0][0]); */ + move16(); + outIm[i1][i0] = Mpy_32_32( Cim, iSqrtVal ); + if ( i0 ) + outIm[i1][i0] = L_negate( outIm[i1][i0] ); + if ( Cim != 0 ) + q_out_1_0 = sub( q_C, exp ); + + /* outRe[i1][i1] = sqrt(max(0.0f, E2 - (Cre * Cre + Cim * Cim) / (E1))) */ + sqrtVal = W_extract_h( W_add( W_mult0_32_32( Cre, Cre ), W_mult0_32_32( Cim, Cim ) ) ); + + /* Special treatment for E1_norm = 0x40000000, Result is known: 2 * sqrtVal */ + IF( GT_32( E1, 0x40000000 ) ) + { + sqrtVal = div_w_newton( sqrtVal, E1 ); + } + q_sqrtVal = sub( add( q_C, q_C ), add( q_E, 2 ) ); /* q_sqrtVal = 2 *q_C - q_E1_norm - 2 */ + q_min = s_min( q_E, q_sqrtVal ); + sqrtVal = L_sub( L_shl( E2, sub( q_min, q_E ) ), L_shl( sqrtVal, sub( q_min, q_sqrtVal ) ) ); + + sqrtVal = L_max( 0, sqrtVal ); + + if ( sqrtVal == 0 ) + { + q_min = Q31; + move16(); + } + + /* normalize sqrtVal */ + exp = norm_l( sqrtVal ); + sqrtVal = L_shl( sqrtVal, exp ); + q_min = add( q_min, exp ); + exp = sub( Q31, q_min ); + iSqrtVal = Isqrt_lc1( sqrtVal, &exp ); + + outRe[i1][i1] = Mpy_32_32( sqrtVal, iSqrtVal ); + q_out_1_1 = sub( q_min, exp ); + + /* normalize q_out */ + *q_out = s_min( s_min( q_out_0_0, q_out_1_1 ), q_out_1_0 ); + *q_out = sub( *q_out, 1 ); /* 1 bit headroom */ + move16(); + outRe[i1][i1] = L_shr( outRe[i1][i1], sub( q_out_1_1, *q_out ) ); + move32(); + outRe[i1][i0] = L_shr( outRe[i1][i0], sub( q_out_1_0, *q_out ) ); + move32(); + outIm[i1][i0] = L_shr( outIm[i1][i0], sub( q_out_1_0, *q_out ) ); + move32(); + outRe[i0][i0] = L_shr( outRe[i0][i0], sub( q_out_0_0, *q_out ) ); + move32(); + return; +} + +static void formulate2x2MixingMatrix_fx( + Word32 Ein1_fx, /*q_Ein*/ + Word32 Ein2_fx, /*q_Ein*/ + Word16 q_Ein, + Word32 CinRe_fx, /*q_Cin*/ + Word32 CinIm_fx, /*q_Cin*/ + Word16 q_Cin, + Word32 Eout1_fx, /*q_Eout*/ + Word32 Eout2_fx, /*q_Eout*/ + Word16 q_Eout, + Word32 CoutRe_fx, /*q_Cout*/ + Word32 CoutIm_fx, /*q_Cout*/ + Word16 q_Cout, + Word32 Q_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /* Q31 */ + Word32 Mre_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /* q_M */ + Word32 Mim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /* q_M */ + Word16 *q_M, + const Word16 regularizationFactor_fx /* Q14 */ ) +{ + /* + This function implements a 2x2 solution for an optimized spatial audio rendering algorithm + Vilkamo, J., Bäckström, T. and Kuntz, A., 2013. + "Optimized covariance domain framework for time–frequency processing of spatial audio." + Journal of the Audio Engineering Society, 61(6), pp.403-411. + + The result of the formulas below are the same as those in the publication, however, some + derivation details differ for as simple as possible 2x2 formulattion + */ + Word16 chA, chB; + Word32 maxEneIn_fx, maxEneOut_fx, maxEne_fx, maxEneDiv_fx; + Word16 q_maxEne, q_maxEneDiv, exp; + Word32 KyRe_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], KyIm_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS]; + Word32 Uxre_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], Uxim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS]; + Word32 Sx_fx[BINAURAL_CHANNELS], Sx_Inv_fx[BINAURAL_CHANNELS], Kxre_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], Kxim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS]; + Word32 tmpRe_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], tmpIm_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS]; + Word32 Are_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], Aim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS]; + Word32 Ure_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], Uim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS]; + Word32 D_fx[BINAURAL_CHANNELS], D_fx_norm; + Word32 div_fx[BINAURAL_CHANNELS]; + Word32 Ghat_fx[BINAURAL_CHANNELS]; + Word32 GhatQ_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS]; + Word32 Pre_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], Pim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS]; + Word16 q_ky, q_Sx, q_Ux, q_Kx, q_A, q_U, q_D, q_P, q_Sx_Inv, q_Sx_0, q_Sx_1, q_Sx_Inv_0, q_Sx_Inv_1; + Word32 E_in1, E_in2, E_out1, E_out2, Cout_re, Cout_im, Cin_re, Cin_im; + Word16 q_ein, q_eout, q_cin, q_cout, q_Ghat, q_Ghat0, q_Ghat1, q_GhatQ, q_temp, q_div_min, q_div[2], norm_div[2]; + Word32 denom; + Word16 q_Pre[BINAURAL_CHANNELS][BINAURAL_CHANNELS], q_Pim[BINAURAL_CHANNELS][BINAURAL_CHANNELS]; + Word16 hdrm_re[BINAURAL_CHANNELS][BINAURAL_CHANNELS], hdrm_im[BINAURAL_CHANNELS][BINAURAL_CHANNELS]; + Word32 i_regularizationFactor_fx; + Flag doRegularization; + set16_fx( hdrm_re[0], 63, i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ) ); + set16_fx( hdrm_im[0], 63, i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ) ); + set16_fx( q_Pre[0], Q31, i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ) ); + set16_fx( q_Pim[0], Q31, i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ) ); + q_ky = 0; + move16(); + q_Sx = 0; + move16(); + q_Ux = 0; + move16(); + q_Kx = 0; + move16(); + + q_ein = q_Ein; + move16(); + q_eout = q_Eout; + move16(); + q_cin = q_Cin; + move16(); + q_cout = q_Cout; + move16(); + + /* Normalize energy values */ + maxEneIn_fx = L_max( Ein1_fx, Ein2_fx ); + move16(); + maxEneOut_fx = L_max( Eout1_fx, Eout2_fx ); + move16(); + + IF( maxEneIn_fx == 0 || maxEneOut_fx == 0 ) + { + Mre_fx[0][0] = Mre_fx[0][1] = Mre_fx[1][0] = Mre_fx[1][1] = 0; + move32(); + move32(); + move32(); + move32(); + Mim_fx[0][0] = Mim_fx[0][1] = Mim_fx[1][0] = Mim_fx[1][1] = 0; + move32(); + move32(); + move32(); + move32(); + *q_M = Q31; + move16(); + return; + } + + IF( LT_16( q_eout, q_ein ) ) + { + maxEne_fx = L_max( maxEneOut_fx, L_shr( maxEneIn_fx, sub( q_ein, q_eout ) ) ); + q_maxEne = q_eout; + move16(); + } + ELSE + { + maxEne_fx = L_max( maxEneIn_fx, L_shr( maxEneOut_fx, sub( q_eout, q_ein ) ) ); + q_maxEne = q_ein; + move16(); + } + + exp = norm_l( maxEne_fx ); + maxEne_fx = L_shl( maxEne_fx, exp ); + + maxEneDiv_fx = ONE_IN_Q31; + move32(); + + /* Special treatment for den = 0x40000000, Result is known: 2 * ONE_IN_Q31 */ + IF( GT_32( maxEne_fx, 0x40000000 ) ) + { + maxEneDiv_fx = div_w_newton( ONE_IN_Q31, maxEne_fx ); + } + + q_maxEneDiv = sub( sub( Q30, exp ), q_maxEne ); + + E_in1 = Mpy_32_32( Ein1_fx, maxEneDiv_fx ); + E_in2 = Mpy_32_32( Ein2_fx, maxEneDiv_fx ); + q_ein = add( q_ein, q_maxEneDiv ); + + Cin_re = Mpy_32_32( CinRe_fx, maxEneDiv_fx ); + Cin_im = Mpy_32_32( CinIm_fx, maxEneDiv_fx ); + q_cin = add( q_cin, q_maxEneDiv ); + + E_out1 = Mpy_32_32( Eout1_fx, maxEneDiv_fx ); + E_out2 = Mpy_32_32( Eout2_fx, maxEneDiv_fx ); + q_eout = add( q_eout, q_maxEneDiv ); + + Cout_re = Mpy_32_32( CoutRe_fx, maxEneDiv_fx ); + Cout_im = Mpy_32_32( CoutIm_fx, maxEneDiv_fx ); + q_cout = add( q_cout, q_maxEneDiv ); + + /* Cholesky decomposition of target / output covariance matrix */ + chol2x2_fx( E_out1, E_out2, q_eout, Cout_re, Cout_im, q_cout, KyRe_fx, KyIm_fx, &q_ky ); + /* Eigendecomposition of input covariance matrix */ + eig2x2_fx( E_in1, E_in2, q_ein, Cin_re, Cin_im, q_cin, Uxre_fx, Uxim_fx, &q_Ux, Sx_fx, &q_Sx ); + /* Eigendecomposition to Kx -- Ux Sx Ux' -> Kx Kx'*/ + + /* calculate sqrt(Sx_fx[0]) and 1.0 / sqrt(Sx_fx[0]) */ + exp = Q31; + move16(); + if ( Sx_fx[0] ) + exp = norm_l( Sx_fx[0] ); + Sx_fx[0] = L_shl( Sx_fx[0], exp ); + q_Sx_0 = add( q_Sx, exp ); + exp = sub( Q31, q_Sx_0 ); + Sx_Inv_fx[0] = Isqrt_lc1( Sx_fx[0], &exp ); + Sx_Inv_fx[0] = Isqrt_newton_iter( Sx_fx[0], Sx_Inv_fx[0], q_Sx_0, exp ); + q_Sx_Inv_0 = sub( Q31, exp ); + Sx_fx[0] = Mpy_32_32( Sx_fx[0], Sx_Inv_fx[0] ); + q_Sx_0 = sub( q_Sx_0, exp ); + + /* calculate sqrt(Sx_fx[1]) and 1.0 / sqrt(Sx_fx[1]) */ + exp = Q31; + move16(); + if ( Sx_fx[1] ) + exp = norm_l( Sx_fx[1] ); + Sx_fx[1] = L_shl( Sx_fx[1], exp ); + q_Sx_1 = add( q_Sx, exp ); + exp = sub( Q31, q_Sx_1 ); + Sx_Inv_fx[1] = Isqrt_lc1( Sx_fx[1], &exp ); + Sx_Inv_fx[1] = Isqrt_newton_iter( Sx_fx[1], Sx_Inv_fx[1], q_Sx_1, exp ); + q_Sx_Inv_1 = sub( Q31, exp ); + Sx_fx[1] = Mpy_32_32( Sx_fx[1], Sx_Inv_fx[1] ); + q_Sx_1 = sub( q_Sx_1, exp ); + + /* normalization */ + q_Sx = s_min( q_Sx_0, q_Sx_1 ); + Sx_fx[0] = L_shr( Sx_fx[0], q_Sx_0 - q_Sx ); + Sx_fx[1] = L_shr( Sx_fx[1], q_Sx_1 - q_Sx ); + + matrixDiagMul_fx( Uxre_fx, Uxim_fx, q_Ux, Sx_fx, q_Sx, Kxre_fx, Kxim_fx, &q_Kx ); + + /* Regularize the diagonal Sx for matrix inversion + Sx_fx[0] = max( Sx_fx[0], regularizationFactor * Sx_fx[1] ) + Sx_fx[1] = max( Sx_fx[1], regularizationFactor * Sx_fx[0] ) + + Reuse calculated Sx_Inv_fx[0] and Sx_Inv_fx[1] in conjunction with + i_regularizationFactor to avoid unnecessary division later by + Sx_fx[0] and Sx_fx[1] to calculate nearest orthonormal matrix P + + This is equivalent to + Sx_Inv_fx[0] = min( Sx_Inv_fx[0], i_regularizationFactor * Sx_Inv_fx[1] ) + Sx_Inv_fx[1] = min( Sx_Inv_fx[1], i_regularizationFactor * Sx_Inv_fx[0] ) + */ + + /* Note: since there are only a few regularization factors, can be replaced by table lookup */ + i_regularizationFactor_fx = L_shl( ONE_IN_Q30 / regularizationFactor_fx, Q15 - Q3 ); /* Q28 */ + + doRegularization = LT_32( Sx_fx[0], L_shl_sat( Mpy_32_16_1( Sx_fx[1], regularizationFactor_fx ), 1 ) ); + if ( doRegularization ) + { + Sx_Inv_fx[0] = Mpy_32_32( i_regularizationFactor_fx, Sx_Inv_fx[1] ); + move32(); + } + if ( doRegularization ) + { + q_Sx_Inv_0 = q_Sx_Inv_1; + move16(); + } + if ( doRegularization ) + q_Sx_Inv_0 = sub( q_Sx_Inv_0, Q3 ); + + doRegularization = LT_32( Sx_fx[1], L_shl_sat( Mpy_32_16_1( Sx_fx[0], regularizationFactor_fx ), 1 ) ); + if ( doRegularization ) + { + Sx_Inv_fx[1] = Mpy_32_32( i_regularizationFactor_fx, Sx_Inv_fx[0] ); + move32(); + } + if ( doRegularization ) + { + q_Sx_Inv_1 = q_Sx_Inv_0; + move16(); + } + if ( doRegularization ) + q_Sx_Inv_1 = sub( q_Sx_Inv_1, Q3 ); + + /* normalization */ + exp = norm_l( Sx_Inv_fx[0] ); + Sx_Inv_fx[0] = L_shl( Sx_Inv_fx[0], exp ); + q_Sx_Inv_0 = add( q_Sx_Inv_0, exp ); + + exp = norm_l( Sx_Inv_fx[1] ); + Sx_Inv_fx[1] = L_shl( Sx_Inv_fx[1], exp ); + q_Sx_Inv_1 = add( q_Sx_Inv_1, exp ); + + q_Sx_Inv = s_min( q_Sx_Inv_0, q_Sx_Inv_1 ); + Sx_Inv_fx[0] = L_shr( Sx_Inv_fx[0], q_Sx_Inv_0 - q_Sx_Inv ); + Sx_Inv_fx[1] = L_shr( Sx_Inv_fx[1], q_Sx_Inv_1 - q_Sx_Inv ); + + /* This is equivalent to the prototype signal energy normalization in the publication + * Ghat[0] = sqrt(Eout1 / max(Ein1, 0.001f * Ein2)) + * Ghat[1] = sqrt(Eout2 / max(Ein2, 0.001f * Ein1)) */ + denom = L_max( E_in1, Mpy_32_32( E_in2, INV_1000_Q31 ) ); + exp = norm_l( denom ); + denom = L_shl( denom, exp ); + + Ghat_fx[0] = E_out1; + move32(); + + /* Special treatment for denom = 0x40000000, Result is known: 2 * E_out1 */ + IF( GT_32( denom, 0x40000000 ) ) + { + Ghat_fx[0] = div_w_newton( E_out1, denom ); + } + exp = add( add( exp, 1 ), sub( q_ein, q_eout ) ); + Ghat_fx[0] = Sqrt32( Ghat_fx[0], &exp ); + q_Ghat0 = sub( Q31, exp ); + + denom = L_max( E_in2, Mpy_32_32( E_in1, INV_1000_Q31 ) ); + exp = norm_l( denom ); + denom = L_shl( denom, exp ); + + Ghat_fx[1] = E_out2; + move32(); + + /* Special treatment for denom = 0x40000000, Result is known: 2 * E_out2 */ + IF( GT_32( denom, 0x40000000 ) ) + { + Ghat_fx[1] = div_w_newton( E_out2, denom ); + } + exp = add( add( exp, 1 ), sub( q_ein, q_eout ) ); + Ghat_fx[1] = Sqrt32( Ghat_fx[1], &exp ); + q_Ghat1 = sub( Q31, exp ); + + /* normalize q_Ghat */ + q_Ghat = s_min( q_Ghat0, q_Ghat1 ); + + Ghat_fx[0] = L_shr( Ghat_fx[0], sub( q_Ghat0, q_Ghat ) ); + move32(); + Ghat_fx[1] = L_shr( Ghat_fx[1], sub( q_Ghat1, q_Ghat ) ); + move32(); + + /* Matrix multiplication, tmp = Ky' * G_hat * Q */ + FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ ) + { + GhatQ_fx[chA][0] = Mpy_32_32( Q_fx[chA][0], Ghat_fx[chA] ); + GhatQ_fx[chA][1] = Mpy_32_32( Q_fx[chA][1], Ghat_fx[chA] ); + move32(); + move32(); + } + q_GhatQ = sub( add( Q31, q_Ghat ), 31 ); + + FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ ) + { + FOR( chB = 0; chB < BINAURAL_CHANNELS; chB++ ) + { + tmpRe_fx[chA][chB] = Madd_32_32( Mpy_32_32( KyRe_fx[0][chA], GhatQ_fx[0][chB] ), KyRe_fx[1][chA], GhatQ_fx[1][chB] ); + tmpIm_fx[chA][chB] = Msub_32_32( Mpy_32_32( L_negate( KyIm_fx[0][chA] ), GhatQ_fx[0][chB] ), KyIm_fx[1][chA], GhatQ_fx[1][chB] ); + move32(); + move32(); + } + } + + q_temp = sub( add( q_ky, q_GhatQ ), 31 ); + matrixScale_fx( tmpRe_fx, tmpIm_fx, &q_temp ); + + /* A = Ky' * G_hat * Q * Kx (see publication) */ + matrixMul_fx( tmpRe_fx, tmpIm_fx, &q_temp, + Kxre_fx, Kxim_fx, &q_Kx, /* Kxre_fx, Kxim_fx have 1 bit headroom and are normalized, no scaling neccessary */ + Are_fx, Aim_fx, &q_A ); + + /* Find nearest orthonormal matrix P to A = Ky' * G_hat * Q * Kx + For matrix A that is P = A(A'A)^0.5 */ + matrixTransp1Mul_fx( Are_fx, Aim_fx, q_A, Are_fx, Aim_fx, q_A, tmpRe_fx, tmpIm_fx, &q_temp ); + + eig2x2_fx( tmpRe_fx[0][0], tmpRe_fx[1][1], q_temp, tmpRe_fx[1][0], tmpIm_fx[1][0], q_temp, Ure_fx, Uim_fx, &q_U, D_fx, &q_D ); + + div_fx[0] = TEN_THOUSAND_Q17; + move32(); + q_div[0] = Q17; + move16(); + + exp = norm_l( D_fx[0] ); + D_fx_norm = L_shl( D_fx[0], exp ); + exp = add( q_D, exp ); + + /* check if D_fx[0] > 1.0 / (10000.0 * 10000.0) */ + IF( GT_32( D_fx_norm, 0 ) && ( ( LT_32( exp, 57 ) || ( EQ_32( exp, 57 ) && GT_32( D_fx_norm, ONE_DIV_100000000_Q57 ) ) ) ) ) + { + exp = sub( 31, exp ); + div_fx[0] = Isqrt_lc1( D_fx_norm, &exp ); + q_div[0] = sub( Q31, exp ); + } + + div_fx[1] = TEN_THOUSAND_Q17; + move32(); + q_div[1] = Q17; + move16(); + + exp = norm_l( D_fx[1] ); + D_fx_norm = L_shl( D_fx[1], exp ); + exp = add( q_D, exp ); + + /* check if D_fx[1] > 1.0 / (10000.0 * 10000.0) */ + IF( GT_32( D_fx_norm, 0 ) && ( ( LT_32( exp, 57 ) || ( EQ_32( exp, 57 ) && GT_32( D_fx_norm, ONE_DIV_100000000_Q57 ) ) ) ) ) + { + exp = sub( Q31, exp ); + div_fx[1] = Isqrt_lc1( D_fx_norm, &exp ); + q_div[1] = sub( Q31, exp ); + } + + matrixMul_fx( Are_fx, Aim_fx, &q_A, + Ure_fx, Uim_fx, &q_U, /* Ure_fx, Uim_fx have 1 bit headroom and are normalized, no scaling neccessary */ + tmpRe_fx, tmpIm_fx, &q_temp ); + + norm_div[0] = Q31; + move16(); + norm_div[1] = Q31; + move16(); + + FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ ) + { + FOR( chB = 0; chB < BINAURAL_CHANNELS; chB++ ) + { + tmpRe_fx[chA][chB] = Mpy_32_32( tmpRe_fx[chA][chB], div_fx[chB] ); + exp = norm_l( tmpRe_fx[chA][chB] ); + if ( tmpRe_fx[chA][chB] ) + norm_div[chB] = s_min( norm_div[chB], exp ); + + tmpIm_fx[chA][chB] = Mpy_32_32( tmpIm_fx[chA][chB], div_fx[chB] ); + exp = norm_l( tmpIm_fx[chA][chB] ); + if ( tmpIm_fx[chA][chB] ) + norm_div[chB] = s_min( norm_div[chB], exp ); + } + } + + /* normalize */ + q_div_min = s_min( add( q_div[0], norm_div[0] ), add( q_div[1], norm_div[1] ) ); + q_div_min = sub( q_div_min, 1 ); /* 1 bit headroom */ + + FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ ) + { + FOR( chB = 0; chB < BINAURAL_CHANNELS; chB++ ) + { + tmpRe_fx[chA][chB] = L_shr( tmpRe_fx[chA][chB], q_div[chB] - q_div_min ); + tmpIm_fx[chA][chB] = L_shr( tmpIm_fx[chA][chB], q_div[chB] - q_div_min ); + } + } + q_temp = sub( add( q_temp, q_div_min ), Q31 ); + + matrixTransp2Mul_fx( + tmpRe_fx, tmpIm_fx, &q_temp, /* tmpRe_fx, tmpIm_fx have 1 bit headroom and are normalized, no scaling neccessary */ + Ure_fx, Uim_fx, &q_U, /* Ure_fx, Uim_fx have 1 bit headroom and are normalized, no scaling neccessary */ + Pre_fx, Pim_fx, &q_P ); /* Nearest orthonormal matrix P to matrix A formulated */ + + /* These are the final formulas of the JAES publication M = Ky P Kx^(-1) */ + + for ( chA = 0; chA < BINAURAL_CHANNELS; chA++ ) + { + for ( chB = 0; chB < BINAURAL_CHANNELS; chB++ ) + { + Pre_fx[chA][chB] = Mpy_32_32( Pre_fx[chA][chB], Sx_Inv_fx[chB] ); + Pim_fx[chA][chB] = Mpy_32_32( Pim_fx[chA][chB], Sx_Inv_fx[chB] ); + } + } + q_P = sub( q_P, sub( Q31, q_Sx_Inv ) ); + + matrixMul_fx( KyRe_fx, KyIm_fx, &q_ky, /* KyRe_fx, KyIm_fx have 1 bit headroom and are normalized, no scaling neccessary */ + Pre_fx, Pim_fx, &q_P, /* Pre_fx, Pim_fx have 1 bit headroom and are normalized, no scaling neccessary */ + tmpRe_fx, tmpIm_fx, &q_temp ); + + matrixTransp2Mul_fx( + tmpRe_fx, tmpIm_fx, &q_temp, + Uxre_fx, Uxim_fx, &q_Ux, /* Ure_fx, Uim_fx have 1 bit headroom and are normalized, no scaling neccessary */ + Mre_fx, Mim_fx, q_M ); + return; +} + +#else static void chol2x2_fx( const Word32 E1, /*q_E*/ const Word32 E2, /*q_E*/ @@ -4837,6 +5063,7 @@ static void chol2x2_fx( return; } + static void formulate2x2MixingMatrix_fx( Word32 Ein1_fx, /*q_Ein*/ Word32 Ein2_fx, /*q_Ein*/ @@ -5090,10 +5317,9 @@ static void formulate2x2MixingMatrix_fx( q_temp = sub( add( q_ky, q_GhatQ ), 31 ); /* A = Ky' * G_hat * Q * Kx (see publication) */ -#ifdef OPT_2182_MATRIX_SCALE_OPS matrixScale_fx( tmpRe_fx, tmpIm_fx, &q_temp ); matrixScale_fx( Kxre_fx, Kxim_fx, &q_Kx ); -#endif + matrixMul_fx( tmpRe_fx, tmpIm_fx, &q_temp, Kxre_fx, Kxim_fx, &q_Kx, Are_fx, Aim_fx, &q_A ); /* Find nearest orthonormal matrix P to A = Ky' * G_hat * Q * Kx @@ -5151,10 +5377,9 @@ static void formulate2x2MixingMatrix_fx( div_fx[0] = L_min( div_fx[0], thresh ); // q_div div_fx[1] = L_min( div_fx[1], thresh ); // q_div -#ifdef OPT_2182_MATRIX_SCALE_OPS matrixScale_fx( Are_fx, Aim_fx, &q_A ); matrixScale_fx( Ure_fx, Uim_fx, &q_U ); -#endif + matrixMul_fx( Are_fx, Aim_fx, &q_A, Ure_fx, Uim_fx, &q_U, tmpRe_fx, tmpIm_fx, &q_temp ); exp = L_norm_arr( div_fx, BINAURAL_CHANNELS ); @@ -5218,17 +5443,10 @@ static void formulate2x2MixingMatrix_fx( } } -#ifdef OPT_2182_MATRIX_SCALE_OPS matrixTransp2Mul_fx( tmpRe_fx, tmpIm_fx, &q_temp, Ure_fx, Uim_fx, &q_U, Pre_fx, Pim_fx, &q_P ); /* Nearest orthonormal matrix P to matrix A formulated */ -#else - matrixTransp2Mul_fx( tmpRe_fx, tmpIm_fx, &q_temp, Ure_fx, Uim_fx, &q_U, - 0 /*int Ascale*/, - 0 /*int Bscale*/, - Pre_fx, Pim_fx, &q_P ); /* Nearest orthonormal matrix P to matrix A formulated */ -#endif /* These are the final formulas of the JAES publication M = Ky P Kx^(-1) */ #if ( BINAURAL_CHANNELS != 2 ) @@ -5359,27 +5577,18 @@ static void formulate2x2MixingMatrix_fx( } } -#ifdef OPT_2182_MATRIX_SCALE_OPS matrixScale_fx( KyRe_fx, KyIm_fx, &q_ky ); matrixScale_fx( Pre_fx, Pim_fx, &q_P ); -#endif matrixMul_fx( KyRe_fx, KyIm_fx, &q_ky, Pre_fx, Pim_fx, &q_P, tmpRe_fx, tmpIm_fx, &q_temp ); -#ifdef OPT_2182_MATRIX_SCALE_OPS matrixScale_fx( tmpRe_fx, tmpIm_fx, &q_temp ); matrixTransp2Mul_fx( tmpRe_fx, tmpIm_fx, &q_temp, Uxre_fx, Uxim_fx, &q_Ux, Mre_fx, Mim_fx, q_M ); -#else - matrixTransp2Mul_fx( tmpRe_fx, tmpIm_fx, &q_temp, Uxre_fx, Uxim_fx, &q_Ux, - 1 /*int Ascale*/, - 0 /*int Bscale*/, - Mre_fx, Mim_fx, q_M ); -#endif return; } - +#endif static void getDirectPartGains_fx( const Word16 bin, @@ -5645,41 +5854,40 @@ static void hrtfShGetHrtf_fx( *------------------------------------------------------------------------*/ /*! r: Configured reqularization factor value */ - Word16 configure_reqularization_factor_fx( const IVAS_FORMAT ivas_format, /* i : IVAS format */ const Word32 ivas_total_brate /* i : IVAS total bitrate */ ) { Word16 reqularizationFactor; - reqularizationFactor = 16384; /* Default value */ /*Q14*/ + reqularizationFactor = 16384; /* Default value 1.0 Q14 */ move16(); IF( EQ_32( ivas_format, MASA_FORMAT ) ) { IF( GE_32( ivas_total_brate, IVAS_160k ) ) { - reqularizationFactor = 6553; /*Q14*/ + reqularizationFactor = 6553; /* 0.4 Q14 */ move16(); } ELSE IF( EQ_32( ivas_total_brate, IVAS_128k ) ) { - reqularizationFactor = 8192; /*Q14*/ + reqularizationFactor = 8192; /*0.5 Q14 */ move16(); } ELSE IF( EQ_32( ivas_total_brate, IVAS_96k ) ) { - reqularizationFactor = 9830; /*Q14*/ + reqularizationFactor = 9830; /* 0.6 *Q14 */ move16(); } ELSE IF( GE_32( ivas_total_brate, IVAS_64k ) ) { - reqularizationFactor = 13107; /*Q14*/ + reqularizationFactor = 13107; /* 0.8 Q14 */ move16(); } ELSE { - reqularizationFactor = 16384; /*Q14*/ + reqularizationFactor = 16384; /* 1.0 Q14 */ move16(); } } @@ -5688,27 +5896,27 @@ Word16 configure_reqularization_factor_fx( { IF( GE_32( ivas_total_brate, IVAS_96k ) ) { - reqularizationFactor = 6553; /*Q14*/ + reqularizationFactor = 6553; /* 0.4 Q14 */ move16(); } ELSE IF( GE_32( ivas_total_brate, IVAS_80k ) ) { - reqularizationFactor = 8192; /*Q14*/ + reqularizationFactor = 8192; /*0.5 Q14 */ move16(); } ELSE IF( GE_32( ivas_total_brate, IVAS_64k ) ) { - reqularizationFactor = 11468; /*Q14*/ + reqularizationFactor = 11468; /* 0.7 Q14 */ move16(); } ELSE IF( GE_32( ivas_total_brate, IVAS_48k ) ) { - reqularizationFactor = 13107; /*Q14*/ + reqularizationFactor = 13107; /* 0.8 Q14 */ move16(); } ELSE { - reqularizationFactor = 16384; /*Q14*/ + reqularizationFactor = 16384; /* 1.0 Q14 */ move16(); } }