From 94da89c87fe916801ea4f5610543340eb51ee95d Mon Sep 17 00:00:00 2001 From: mave2802 <59919483+mave2802@users.noreply.github.com> Date: Thu, 30 Oct 2025 11:31:44 +0100 Subject: [PATCH 1/5] added dedicated version of formulate2x2MixingMatrix() for the case if cross terms are zero --- lib_com/options.h | 59 ++- .../ivas_dirac_dec_binaural_functions_fx.c | 363 +++++++++++++++++- 2 files changed, 414 insertions(+), 8 deletions(-) diff --git a/lib_com/options.h b/lib_com/options.h index 2c70dd14f..7107acf04 100644 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -80,9 +80,62 @@ /* ################### Start BE switches ################################# */ /* only BE switches wrt wrt. TS 26.251 V3.0 */ -/*#define FIX_I4_OL_PITCH*/ /* fix open-loop pitch used for EVS core switching */ -#define FIX_1990_SANITIZER_IN_REVERB_LOAD /* Nokia: Fix issue part of issue 1990 by introducing missing free of structure - keep until #2059 is addressed */ -#define FIX_1999_TEMPORARY_DISABLE_DIST_ATT_CHECK /* Eri: Issue 1999: Range check on float values of distance attenuation, while the float values are not propagated to this function. The test is not correct, but configurable distance attenuation is not used in Characterization.*/ +/* ################### Start FIXES switches ########################### */ +/*#define FIX_I4_OL_PITCH*/ /* fix open-loop pitch used for EVS core switching */ +#define REMOVE_BASOP_Util_Divide3232_Scale_cadence /* remove this division variant */ +#define FIX_1990_SANITIZER_IN_REVERB_LOAD /* Nokia: Fix issue part of issue 1990 by introducing missing free of structure - keep until #2059 is addressed */ +#define FIX_1999_TEMPORARY_DISABLE_DIST_ATT_CHECK /* Eri: Issue 1999: Range check on float values of distance attenuation, while the float values are not propagated to this function. The test is not correct, but configurable distance attenuation is not used in Characterization.*/ +#define TEMP_FIX_2088_MSAN_INIT_ERROR /* Eri: Temporary fix for Issue 2088 - MSAN error. Will come with later port of JBM+Split rendering update */ +#define FIX_2092_ASSERT_IN_OMASA_RENDER /* FhG, Nokia: Fix LTV crash due to overflow in OMASA EXT output */ +#define FIX_2084_FLOATING_POINT_LEFTOVERS /* FhG: convert floating-point leftovers in IVAS_ENC_FeedObjectMetadata() */ +#define FIX_2141_ASSERT_IN_OMASA_BITRATE_SWITSCHING /* FhG: Replace L_shl with L_shl_sat to prevent overflow when calculating scale factors for very small numbers in the logarithmic domain */ +#define FIX_APA_EXECS_SCALING /* VA: fix scaling of JBM APA buffer */ +#define FIX_2164_ASSERT_IN_OMASA_PREPROC_FOR_EDIT /* Nokia: Issue 2164: Prevent overflow when calculating equalization coefficient for editing before clamping to safe range */ +#define FIX_BASOP_ASSERT_IN_TONAL_MDCT_PLC /* FhG: fix for issue 2165 - using saturating addition in tonal MDCT PLC function */ +#define OPT_2146_BASOP_UTIL_ADD_MANT32EXP /* Dlb: optimized version of BASOP_Util_Add_Mant32Exp() */ +#define FIX_2166_ASSERT_OSBA_PLC_STEREO_OUT /* FhG: fix for issue 2166 - add missing averaging factor 0.5 in for the sum of energies in function stereo_dft_dmx_swb_nrg_fx()*/ +#define FIX_2086_ENABLE_HP20_OPT_FOR_ENC /* FhG: Enable hp20_fx_32_opt() for Encoder */ +#define FIX_1793_DEC_MC_TO_MONO_SCALING_ISSUE /* FhG: Use dynamic Q factor for synth_fx and synthFB_fx to prevent overflow */ +#define FIX_2170_ASSERT_IN_FFT3 /* Eri: Assert in fft3_fx from EVS, adding _sat */ +#define FIX_2082_FP_LEFTOVERS_OMASA_DEC /* Nokia: fix for issue 2082, cleaning remaining floating point code */ +#define FIX_2174_JBM_BASOP_ALIGNMENT /* VoiceAge, Nokia: Fixes to JBM BASOP implementation and alignment to float */ +#define FIX_GAIN_EDIT_LIMITS /* Harmonize gain edit limits for all opertation points. For all modes, limit to max +12dB. For parametric modes, limit to min -24dB. */ + +#define FIX_2176_ASSERT_DEC_MAP_PARAMS_DIRAC2STEREO /* FhG: Reduce hStereoDft->q_smooth_buf_fx by one to prevent overflow in the subframe_band_nrg[][] calculation */ +#define FIX_2015_PREMPH_SAT_ALT /* VA: saturation can happen during preemphasis filtering due to a too aggressive scaling factor, allows preemphis to get 1 more bit headroom */ +#define FIX_2178_FL_TO_FX_WITH_OBJ_EDIT_FILE_INTERFACE /* Nokia: Fixes float to fx conversion in decoder app with object edit file interface */ +#define FIX_2070_JBM_TC_CHANNEL_RESCALING_ISSUE /* Eri/Orange: scale_sig32 problem on p_tc_fx[] */ + +#define FIX_2173_UBSAN_IN_JBM_PCMDSP_APA /* FhG: Fix UBSAN problems in jbm_pcmdsp_apa_fx.c */ +#define FIX_1947_DEC_HIGH_MLD_FOR_STEREO2MONO /* FhG: Make Q-factor of synth_16_fx and output_16_fx dynamic to prevent overflow in HQ_CORE mode */ + +#define FIX_2184_EVS_STEREO_DMX_CHANNEL_DISAPPEARING /* Orange: Fix for issue 2184 - to prevent one channel from becoming inaudible in the mono downmix output */ +#define FIX_2148_OBJ_EDIT_ISSUE_WITH_OSBA /* Nokia: Add missing code to solve issue */ +#define FIX_2200_ISAR_PLC_CRASH /* Dolby: Fix for ISAR PLC crash observed with newly added BASOP tests */ +#define FIX_2210_ASSERT_IN_BW_DETEC_FX_FOR_OMASA /* FhG: Resolve overflow by swapping the order of addition and multiplication */ +#define FIX_2217_ASSERT_IN_IVAS_CORE_DECODER_WITH_MC /* FhG: Adjust Q_real to prevent overflow in st->cldfbSyn->cldfb_state_fx scaling */ +#define FIX_2211_ASSERT_IN_REND_CREND_CONVOLER /* FhG: Add headroom to p_output_fx to prevent overflow in ivas_rend_crendProcessSubframe_fx() */ + +#define NONBE_FIX_2205_SATURATE_ALTERNATIVE +#define NONBE_FIX_2206_SATURATE_ALTERNATIVE +#define FIX_2226_ISAR_PRE_CRASH_CLDFB_NO_CHANNELS /* Dolby: Fix crash of ISAR pre-renderer due to an attempt of re-scaling uninitialized values in the CLDFB filter bank */ +#define RTP_SR_CODEC_FRAME_SIZE_IN_TOC_BYTE /* Dolby: CR for split rendering codec framesize signalling in Toc Byte */ + + +#define NONBE_FIX_ISSUE_2232_CHECK_CLDFB_STATES /* FhG: Adjust scaleFactor according to st->cldfbSyn->cldfb_state_fx too to avoid overflow in cldfbSynthesis_ivas_fx() */ + +#define NONBE_FIX_BASOP_2233_RTPDUMP_DIFFERING_BITSTREAMS /* Nokia: fix basop issue 2233: Fix differing rtpdump streams */ + +#define NONBE_FIX_2237_ZERO_CURR_NOISE_PROBLEM /* FhG: Modify sum of hTonalMDCTConc->curr_noise_nrg to avoid inaccurate zero */ + +#define NONBE_2169_BINAURAL_MIXING_MATRIX_OPT /* Dlb: use dedicated formulate2x2MixingMatrix() function if cross terms are zero */ +/* ################### End FIXES switches ########################### */ + +/* #################### Start BASOP porting switches ############################ */ + +#define NONBE_1244_FIX_SWB_BWE_MEMORY /* VA: issue 1244: fix to SWB BWE memory in case of switching from FB coding - pending a review by Huawei */ +#define FIX_1053_REVERB_RECONFIGURATION +#define FIX_1119_SPLIT_RENDERING_VOIP /* FhG: Add split rendering support to decoder in VoIP mode */ #define TMP_1342_WORKAROUND_DEC_FLUSH_BROKEN_IN_SR /* FhG: Temporary workaround for incorrect implementation of decoder flush with split rendering */ #define NONBE_1122_KEEP_EVS_MODE_UNCHANGED /* FhG: Disables fix for issue 1122 in EVS mode to keep BE tests green. This switch should be removed once the 1122 fix is added to EVS via a CR. */ #define FIX_1435_MOVE_STEREO_PANNING /* VA: issue 1435: do the EVS stereo panning in the renderer */ diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c index 0152bf465..53d7aac32 100644 --- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c +++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c @@ -116,11 +116,9 @@ static void getDirectPartGains_fx( const Word16 bin, Word16 aziDeg, Word16 eleDe static void ivas_masa_ext_rend_parambin_internal_fx( MASA_EXT_REND_HANDLE hMasaExtRend, COMBINED_ORIENTATION_HANDLE hCombinedOrientationData, Word32 *output_fx[] /*Q11*/, const Word16 subframe, const SPLIT_REND_WRAPPER *hSplitRendWrapper, Word32 Cldfb_Out_Real[][CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX], Word32 Cldfb_Out_Imag[][CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX] ); static void formulate2x2MixingMatrix_fx( Word32 Ein1_fx /*q_Ein*/, Word32 Ein2_fx /*q_Ein*/, Word16 q_Ein, Word32 CinRe_fx /*q_Cin*/, Word32 CinIm_fx /*q_Cin*/, Word16 q_Cin, Word32 Eout1_fx /*q_Eout*/, Word32 Eout2_fx /*q_Eout*/, Word16 q_Eout, Word32 CoutRe_fx /*q_Cout*/, Word32 CoutIm_fx /*q_Cout*/, Word16 q_Cout, Word32 Q_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*Q31*/, Word32 Mre_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_M*/, Word32 Mim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_M*/, Word16 *q_M, const Word16 regularizationFactor_fx /*Q14*/ ); - -#ifdef OPT_2182_MATRIX_SCALE_OPS -static void matrixScale_fx( Word32 Are_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_A*/, Word32 Aim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_A*/, Word16 *q_A ); -#endif - +#ifdef NONBE_2169_BINAURAL_MIXING_MATRIX_OPT +static void formulate2x2MixingMatrixNoCross_fx( Word32 Ein1_fx /*q_Ein*/, Word32 Ein2_fx /*q_Ein*/, Word16 q_Ein, Word32 Eout1_fx /*q_Eout*/, Word32 Eout2_fx /*q_Eout*/, Word16 q_Eout, Word32 CoutRe_fx /*q_Cout*/, Word32 CoutIm_fx /*q_Cout*/, Word16 q_Cout, Word32 Q_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*Q31*/, Word32 Mre_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_M*/, Word32 Mim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_M*/, Word16* q_M ); +#endif /* NONBE_2169_BINAURAL_MIXING_MATRIX_OPT */ static void matrixMul_fx( Word32 Are[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_A*/, Word32 Aim[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_A*/, Word16 *q_A, Word32 Bre[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_B*/, Word32 Bim[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_B*/, Word16 *q_B, Word32 outRe[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_out*/, Word32 outIm[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_out*/, Word16 *q_out ); #ifdef OPT_2182_MATRIX_SCALE_OPS @@ -2355,12 +2353,22 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx( CrCrossIm_fx = Mpy_32_32( CrCrossIm_fx, decorrelationReductionFactor_fx ); q_CrCross = sub( add( q_CrCross, q_decorrelationReductionFactor ), 31 ); +#ifndef NONBE_2169_BINAURAL_MIXING_MATRIX_OPT formulate2x2MixingMatrix_fx( hDiracDecBin->ChEne_fx[0][bin], hDiracDecBin->ChEne_fx[1][bin], hDiracDecBin->q_ChEne, 0, 0, /* Decorrelated signal has ideally no cross-terms */ Q31, CrEneL_fx, CrEneR_fx, q_CrEne, CrCrossRe_fx, CrCrossIm_fx, q_CrCross, prototypeMtx_fx, MdecRe_fx, MdecIm_fx, &q_Mdec, 3277 ); // 3277 = 0.2 in Q14 +#else /* NONBE_2169_BINAURAL_MIXING_MATRIX_OPT */ + /* Determine a residual mixing matrix Mdec for processing the decorrelated signal to obtain + * the residual signal (that has the residual covariance matrix) + * Decorrelated signal has ideally no cross-terms */ + formulate2x2MixingMatrixNoCross_fx( hDiracDecBin->ChEne_fx[0][bin], hDiracDecBin->ChEne_fx[1][bin], hDiracDecBin->q_ChEne, + CrEneL_fx, CrEneR_fx, q_CrEne, + CrCrossRe_fx, CrCrossIm_fx, q_CrCross, + prototypeMtx_fx, MdecRe_fx, MdecIm_fx, &q_Mdec ); +#endif /* NONBE_2169_BINAURAL_MIXING_MATRIX_OPT */ } ELSE { @@ -4837,6 +4845,351 @@ static void chol2x2_fx( return; } + +#ifdef NONBE_2169_BINAURAL_MIXING_MATRIX_OPT +static void formulate2x2MixingMatrixNoCross_fx( + Word32 Ein1_fx /*q_Ein*/, + Word32 Ein2_fx /*q_Ein*/, + Word16 q_Ein, + Word32 Eout1_fx /*q_Eout*/, + Word32 Eout2_fx /*q_Eout*/, + Word16 q_Eout, + Word32 CoutRe_fx /*q_Cout*/, + Word32 CoutIm_fx /*q_Cout*/, + Word16 q_Cout, + Word32 Q_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*Q31*/, + Word32 Mre_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_M*/, + Word32 Mim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_M*/, + Word16 *q_M ) +{ + /* + This function implements a 2x2 solution for an optimized spatial audio rendering algorithm, based on + Vilkamo, J., Bäckström, T. and Kuntz, A., 2013. + "Optimized covariance domain framework for time–frequency processing of spatial audio." + Journal of the Audio Engineering Society, 61(6), pp.403-411. + but optimized for decorrelated signals + + The result of the formulas below are the same as those in the publication, however, some + derivation details differ for as simple as possible 2x2 formulation + */ + Word16 chA, chB; + Word32 maxEne_fx, tmp, maxEneDiv_fx; + Word16 q_maxEne, q_maxEneDiv, exp, exp1; + Word32 KyRe_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], KyIm_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS]; + Word32 tmpRe_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], tmpIm_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS]; + Word32 Are_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], Aim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS]; + Word32 Ure_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], Uim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS]; + Word32 D_fx[BINAURAL_CHANNELS]; + Word32 div_fx[BINAURAL_CHANNELS]; + Word32 Ghat_fx[BINAURAL_CHANNELS]; + Word32 GhatQ_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS]; + Word32 Pre_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], Pim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS]; + Word16 q_ky, q_A, q_U, q_D, q_P; + Word32 E_in1, E_in2, E_out1, E_out2, Cout_re, Cout_im; + Word16 q_ein, q_eout, q_cout, q_Ghat, q_GhatQ, q_temp, q_div, exp_temp; + Word32 temp; + Word16 q_Pre[BINAURAL_CHANNELS][BINAURAL_CHANNELS], q_Pim[BINAURAL_CHANNELS][BINAURAL_CHANNELS]; + Word16 hdrm_re[BINAURAL_CHANNELS][BINAURAL_CHANNELS], hdrm_im[BINAURAL_CHANNELS][BINAURAL_CHANNELS]; + set16_fx( hdrm_re[0], 63, i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ) ); + set16_fx( hdrm_im[0], 63, i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ) ); + set16_fx( q_Pre[0], Q31, i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ) ); + set16_fx( q_Pim[0], Q31, i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ) ); + + q_ky = 0; + move16(); + + exp = sub( get_min_scalefactor( Ein1_fx, Ein2_fx ), 1 ); + E_in1 = L_shl( Ein1_fx, exp ); + E_in2 = L_shl( Ein2_fx, exp ); + q_ein = add( q_Ein, exp ); + + exp = sub( get_min_scalefactor( Eout1_fx, Eout2_fx ), 1 ); + E_out1 = L_shl( Eout1_fx, exp ); + E_out2 = L_shl( Eout2_fx, exp ); + q_eout = add( q_Eout, exp ); + + exp = sub( get_min_scalefactor( CoutRe_fx, CoutIm_fx ), 1 ); + Cout_re = L_shl( CoutRe_fx, exp ); + Cout_im = L_shl( CoutIm_fx, exp ); + q_cout = add( q_Cout, exp ); + + /* Normalize energy values */ + maxEne_fx = L_max( E_in1, E_in2 ); + q_maxEne = q_ein; + move16(); + + tmp = L_max( E_out1, E_out2 ); + IF( LT_16( q_maxEne, q_eout ) ) + { + maxEne_fx = L_max( maxEne_fx, L_shr( tmp, sub( q_eout, q_maxEne ) ) ); // q_maxEne + } + ELSE + { + maxEne_fx = L_max( L_shr( maxEne_fx, sub( q_maxEne, q_eout ) ), tmp ); // q_maxEne + q_maxEne = q_eout; + move16(); + } + + // 4611686 = Q62 + IF( maxEne_fx == 0 ) + { + maxEneDiv_fx = ONE_DIV_EPSILON_MANT; + move32(); + q_maxEneDiv = 31 - ONE_DIV_EPSILON_EXP; + move16(); + } + ELSE + { + maxEneDiv_fx = BASOP_Util_Divide3232_Scale_newton( ONE_IN_Q30, maxEne_fx, &exp ); + q_maxEneDiv = add( sub( 31, exp ), sub( Q30, q_maxEne ) ); + } + exp = norm_l( maxEneDiv_fx ); + maxEneDiv_fx = L_shl( maxEneDiv_fx, exp ); + q_maxEneDiv = add( q_maxEneDiv, exp ); + + E_in1 = Mpy_32_32( E_in1, maxEneDiv_fx ); + E_in2 = Mpy_32_32( E_in2, maxEneDiv_fx ); + q_ein = sub( add( q_ein, q_maxEneDiv ), 31 ); + + E_out1 = Mpy_32_32( E_out1, maxEneDiv_fx ); + E_out2 = Mpy_32_32( E_out2, maxEneDiv_fx ); + q_eout = sub( add( q_eout, q_maxEneDiv ), 31 ); + + Cout_re = Mpy_32_32( Cout_re, maxEneDiv_fx ); + Cout_im = Mpy_32_32( Cout_im, maxEneDiv_fx ); + q_cout = sub( add( q_cout, q_maxEneDiv ), 31 ); + + /* Cholesky decomposition of target / output covariance matrix */ + chol2x2_fx( E_out1, E_out2, q_eout, Cout_re, Cout_im, q_cout, KyRe_fx, KyIm_fx, &q_ky ); + + /* If there are no cross-terms, the Eigendecomposition of input covariance matrix + can be skipped. Uxre is a unit matrix, Uxim is a zero matrix and Sx is (1, 1) + Further on, also Kxre is a unit matrix and Kxim is a zero matrix + Multiplication with these matrices / scalars can be skipped + */ + + temp = Mpy_32_32( E_in2, INV_1000_Q31 ); + temp = L_max( temp, E_in1 ); + + IF( temp == 0 ) + { + IF( E_out1 == 0 ) + { + Ghat_fx[0] = 0; + exp = -19; + move32(); + move16(); + } + ELSE + { + temp = BASOP_Util_Divide3232_Scale_newton( E_out1, 4611686, &exp ); // 4611686 = Q62 + exp = sub( exp, sub( q_eout, 62 ) ); + Ghat_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp + } + } + ELSE + { + temp = BASOP_Util_Add_Mant32Exp( temp, sub( 31, q_ein ), EPSILON_MANT, EPSILON_EXP, &exp_temp ); + + temp = BASOP_Util_Divide3232_Scale_newton( E_out1, temp, &exp ); + exp = sub( exp, sub( q_eout, sub( 31, exp_temp ) ) ); + Ghat_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp + } + move32(); + + temp = Mpy_32_32( E_in1, 2147484 ); + temp = L_max( temp, E_in2 ); // q_ein + IF( temp == 0 ) + { + IF( E_out2 == 0 ) + { /* We can set hard-coded results */ + Ghat_fx[1] = 0; + exp1 = -19; + move16(); + } + ELSE + { + temp = BASOP_Util_Divide3232_Scale_newton( E_out2, 4611686, &exp1 ); // 4611686 = Q62 + exp1 = sub( exp1, sub( q_eout, 62 ) ); + Ghat_fx[1] = Sqrt32( temp, &exp1 ); // Q = 31 - exp1 + } + } + ELSE + { + temp = BASOP_Util_Add_Mant32Exp( temp, sub( 31, q_ein ), EPSILON_MANT, EPSILON_EXP, &exp_temp ); + + temp = BASOP_Util_Divide3232_Scale_newton( E_out2, temp, &exp1 ); + exp1 = sub( exp1, sub( q_eout, sub( 31, exp_temp ) ) ); + Ghat_fx[1] = Sqrt32( temp, &exp1 ); // Q = 31 - exp1 + } + move32(); + + q_Ghat = sub( 31, s_max( exp, exp1 ) ); + Word16 q_diff = sub( 31, q_Ghat ); + Ghat_fx[0] = L_shr( Ghat_fx[0], sub( q_diff, exp ) ); // q_Ghat + move32(); + Ghat_fx[1] = L_shr( Ghat_fx[1], sub( q_diff, exp1 ) ); // q_Ghat + move32(); + + /* Matrix multiplication, A = Ky' * G_hat * Q */ + FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ ) + { + GhatQ_fx[chA][0] = Mpy_32_32( Q_fx[chA][0], Ghat_fx[chA] ); + GhatQ_fx[chA][1] = Mpy_32_32( Q_fx[chA][1], Ghat_fx[chA] ); + move32(); + move32(); + } + q_GhatQ = sub( add( Q31, q_Ghat ), 31 ); + + exp = sub( s_min( L_norm_arr( KyRe_fx[0], i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ) ), L_norm_arr( KyIm_fx[0], i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ) ) ), 1 ); + scale_sig32( KyRe_fx[0], BINAURAL_CHANNELS * BINAURAL_CHANNELS, exp ); + scale_sig32( KyIm_fx[0], BINAURAL_CHANNELS * BINAURAL_CHANNELS, exp ); + q_ky = add( q_ky, exp ); + + FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ ) + { + FOR( chB = 0; chB < BINAURAL_CHANNELS; chB++ ) + { + Are_fx[chA][chB] = Madd_32_32( Mpy_32_32( KyRe_fx[0][chA], GhatQ_fx[0][chB] ), KyRe_fx[1][chA], GhatQ_fx[1][chB] ); + Aim_fx[chA][chB] = Msub_32_32( Mpy_32_32( L_negate( KyIm_fx[0][chA] ), GhatQ_fx[0][chB] ), KyIm_fx[1][chA], GhatQ_fx[1][chB] ); + move32(); + move32(); + } + } + + q_A = sub( add( q_ky, q_GhatQ ), 31 ); // TODO SMM: Check if this is correct!!! + move16(); + + /* Find nearest orthonormal matrix P to A = Ky' * G_hat * Q * Kx + For matrix A that is P = A(A'A)^0.5 */ + matrixTransp1Mul_fx( Are_fx, Aim_fx, q_A, Are_fx, Aim_fx, q_A, tmpRe_fx, tmpIm_fx, &q_temp ); + + eig2x2_fx( tmpRe_fx[0][0], tmpRe_fx[1][1], q_temp, tmpRe_fx[1][0], tmpIm_fx[1][0], q_temp, Ure_fx, Uim_fx, &q_U, D_fx, &q_D ); + + IF( D_fx[0] != 0 && D_fx[1] == 0 ) // Due to an eig2x2 error, sometimes D_fx[1] becomes zero, which implies that the input matrix should be singular (i.e., determinant = 0). + { + Word32 det_fx = L_sub_sat( Mult_32_32( tmpRe_fx[0][0], tmpRe_fx[1][1] ), + L_add_sat( Mult_32_32( tmpRe_fx[1][0], tmpRe_fx[1][0] ), + Mult_32_32( tmpIm_fx[1][0], tmpIm_fx[1][0] ) ) ); + if ( det_fx != 0 ) + { + D_fx[1] = SMALL_EIGENVALUE; // Setting D_fx[1] to epsilon has no effect, as the value is too small to affect the output. + move32(); + } + } + + IF( D_fx[0] == 0 ) + { + temp = ONE_DIV_EPSILON_MANT; /* Result of 1.0/eps with full precision */ + move32(); + exp = ONE_DIV_EPSILON_EXP; + move16(); + } + ELSE + { + temp = BASOP_Util_Divide3232_Scale_newton( ONE_IN_Q30, D_fx[0], &exp ); + exp = sub( exp, sub( Q30, q_D ) ); + } + div_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp + move32(); + + // Sqrt(1) + div_fx[1] = L_add( 0, 2047986068 ); // Q = 31 - exp1 + exp1 = add( 0, 20 ); + + IF( D_fx[1] != 0 ) // This is the new code: replace div sqrt by isqrt + { + exp1 = sub( 31, q_D ); + div_fx[1] = ISqrt32( D_fx[1], &exp1 ); + move32(); + } + q_div = sub( 31, s_max( exp, exp1 ) ); + + div_fx[0] = L_shr( div_fx[0], sub( sub( 31, exp ), q_div ) ); // q_div + move32(); + div_fx[1] = L_shr( div_fx[1], sub( sub( 31, exp1 ), q_div ) ); // q_div + move32(); + + // 1310720000 = 10,000.0f in Q17 + Word32 thresh = L_shl_sat( 1310720000, sub( q_div, Q17 ) ); // q_div + div_fx[0] = L_min( div_fx[0], thresh ); // q_div + div_fx[1] = L_min( div_fx[1], thresh ); // q_div + + matrixMul_fx( Are_fx, Aim_fx, &q_A, Ure_fx, Uim_fx, &q_U, tmpRe_fx, tmpIm_fx, &q_temp ); + + exp = L_norm_arr( div_fx, BINAURAL_CHANNELS ); + scale_sig32( div_fx, BINAURAL_CHANNELS, exp ); + q_div = add( q_div, exp ); + + FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ ) + { + FOR( chB = 0; chB < BINAURAL_CHANNELS; chB++ ) + { + Word64 W_tmp; + + W_tmp = W_mult0_32_32( tmpRe_fx[chA][chB], div_fx[chB] ); + IF( W_tmp != 0 ) + { + Word16 hdrm = sub( W_norm( W_tmp ), 32 ); + tmpRe_fx[chA][chB] = W_shl_sat_l( W_tmp, hdrm ); + move32(); + hdrm_re[chA][chB] = add( add( q_temp, q_div ), hdrm ); + move16(); + } + ELSE + { + tmpRe_fx[chA][chB] = 0; + move32(); + } + + W_tmp = W_mult0_32_32( tmpIm_fx[chA][chB], div_fx[chB] ); + IF( W_tmp != 0 ) + { + Word16 hdrm = sub( W_norm( W_tmp ), 32 ); + move16(); + tmpIm_fx[chA][chB] = W_shl_sat_l( W_tmp, hdrm ); + move32(); + hdrm_im[chA][chB] = add( add( q_temp, q_div ), hdrm ); + move16(); + } + ELSE + { + tmpIm_fx[chA][chB] = 0; + move32(); + } + } + } + + minimum_s( hdrm_re[0], i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ), &exp ); + q_temp = exp; + move16(); + minimum_s( hdrm_im[0], BINAURAL_CHANNELS * BINAURAL_CHANNELS, &exp ); + q_temp = s_min( q_temp, exp ); + q_temp = sub( q_temp, 1 ); + + FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ ) + { + FOR( chB = 0; chB < BINAURAL_CHANNELS; chB++ ) + { + tmpRe_fx[chA][chB] = L_shr( tmpRe_fx[chA][chB], sub( hdrm_re[chA][chB], q_temp ) ); + tmpIm_fx[chA][chB] = L_shr( tmpIm_fx[chA][chB], sub( hdrm_im[chA][chB], q_temp ) ); + move32(); + move32(); + } + } + + matrixTransp2Mul_fx( tmpRe_fx, tmpIm_fx, &q_temp, Ure_fx, Uim_fx, &q_U, + 0 /*int Ascale*/, + 0 /*int Bscale*/, + Pre_fx, Pim_fx, &q_P ); /* Nearest orthonormal matrix P to matrix A formulated */ + + matrixMul_fx( KyRe_fx, KyIm_fx, &q_ky, Pre_fx, Pim_fx, &q_P, Mre_fx, Mim_fx, q_M ); + + return; +} +#endif /* FORM2x2MIXMAT_IMPROVEMENT */ + + static void formulate2x2MixingMatrix_fx( Word32 Ein1_fx, /*q_Ein*/ Word32 Ein2_fx, /*q_Ein*/ -- GitLab From 3b5b89fc2ca8309726c49f7956db2c8af0b13653 Mon Sep 17 00:00:00 2001 From: mave2802 <59919483+mave2802@users.noreply.github.com> Date: Wed, 10 Dec 2025 09:16:51 +0100 Subject: [PATCH 2/5] refactoring of formulate2x2MixingMatrix() --- lib_com/options.h | 66 +- .../ivas_dirac_dec_binaural_functions_fx.c | 1429 +++++++++++------ 2 files changed, 957 insertions(+), 538 deletions(-) diff --git a/lib_com/options.h b/lib_com/options.h index 7107acf04..20004a4f3 100644 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -80,62 +80,13 @@ /* ################### Start BE switches ################################# */ /* only BE switches wrt wrt. TS 26.251 V3.0 */ -/* ################### Start FIXES switches ########################### */ -/*#define FIX_I4_OL_PITCH*/ /* fix open-loop pitch used for EVS core switching */ -#define REMOVE_BASOP_Util_Divide3232_Scale_cadence /* remove this division variant */ -#define FIX_1990_SANITIZER_IN_REVERB_LOAD /* Nokia: Fix issue part of issue 1990 by introducing missing free of structure - keep until #2059 is addressed */ -#define FIX_1999_TEMPORARY_DISABLE_DIST_ATT_CHECK /* Eri: Issue 1999: Range check on float values of distance attenuation, while the float values are not propagated to this function. The test is not correct, but configurable distance attenuation is not used in Characterization.*/ -#define TEMP_FIX_2088_MSAN_INIT_ERROR /* Eri: Temporary fix for Issue 2088 - MSAN error. Will come with later port of JBM+Split rendering update */ -#define FIX_2092_ASSERT_IN_OMASA_RENDER /* FhG, Nokia: Fix LTV crash due to overflow in OMASA EXT output */ -#define FIX_2084_FLOATING_POINT_LEFTOVERS /* FhG: convert floating-point leftovers in IVAS_ENC_FeedObjectMetadata() */ -#define FIX_2141_ASSERT_IN_OMASA_BITRATE_SWITSCHING /* FhG: Replace L_shl with L_shl_sat to prevent overflow when calculating scale factors for very small numbers in the logarithmic domain */ -#define FIX_APA_EXECS_SCALING /* VA: fix scaling of JBM APA buffer */ -#define FIX_2164_ASSERT_IN_OMASA_PREPROC_FOR_EDIT /* Nokia: Issue 2164: Prevent overflow when calculating equalization coefficient for editing before clamping to safe range */ -#define FIX_BASOP_ASSERT_IN_TONAL_MDCT_PLC /* FhG: fix for issue 2165 - using saturating addition in tonal MDCT PLC function */ -#define OPT_2146_BASOP_UTIL_ADD_MANT32EXP /* Dlb: optimized version of BASOP_Util_Add_Mant32Exp() */ -#define FIX_2166_ASSERT_OSBA_PLC_STEREO_OUT /* FhG: fix for issue 2166 - add missing averaging factor 0.5 in for the sum of energies in function stereo_dft_dmx_swb_nrg_fx()*/ -#define FIX_2086_ENABLE_HP20_OPT_FOR_ENC /* FhG: Enable hp20_fx_32_opt() for Encoder */ -#define FIX_1793_DEC_MC_TO_MONO_SCALING_ISSUE /* FhG: Use dynamic Q factor for synth_fx and synthFB_fx to prevent overflow */ -#define FIX_2170_ASSERT_IN_FFT3 /* Eri: Assert in fft3_fx from EVS, adding _sat */ -#define FIX_2082_FP_LEFTOVERS_OMASA_DEC /* Nokia: fix for issue 2082, cleaning remaining floating point code */ -#define FIX_2174_JBM_BASOP_ALIGNMENT /* VoiceAge, Nokia: Fixes to JBM BASOP implementation and alignment to float */ -#define FIX_GAIN_EDIT_LIMITS /* Harmonize gain edit limits for all opertation points. For all modes, limit to max +12dB. For parametric modes, limit to min -24dB. */ - -#define FIX_2176_ASSERT_DEC_MAP_PARAMS_DIRAC2STEREO /* FhG: Reduce hStereoDft->q_smooth_buf_fx by one to prevent overflow in the subframe_band_nrg[][] calculation */ -#define FIX_2015_PREMPH_SAT_ALT /* VA: saturation can happen during preemphasis filtering due to a too aggressive scaling factor, allows preemphis to get 1 more bit headroom */ -#define FIX_2178_FL_TO_FX_WITH_OBJ_EDIT_FILE_INTERFACE /* Nokia: Fixes float to fx conversion in decoder app with object edit file interface */ -#define FIX_2070_JBM_TC_CHANNEL_RESCALING_ISSUE /* Eri/Orange: scale_sig32 problem on p_tc_fx[] */ - -#define FIX_2173_UBSAN_IN_JBM_PCMDSP_APA /* FhG: Fix UBSAN problems in jbm_pcmdsp_apa_fx.c */ -#define FIX_1947_DEC_HIGH_MLD_FOR_STEREO2MONO /* FhG: Make Q-factor of synth_16_fx and output_16_fx dynamic to prevent overflow in HQ_CORE mode */ - -#define FIX_2184_EVS_STEREO_DMX_CHANNEL_DISAPPEARING /* Orange: Fix for issue 2184 - to prevent one channel from becoming inaudible in the mono downmix output */ -#define FIX_2148_OBJ_EDIT_ISSUE_WITH_OSBA /* Nokia: Add missing code to solve issue */ -#define FIX_2200_ISAR_PLC_CRASH /* Dolby: Fix for ISAR PLC crash observed with newly added BASOP tests */ -#define FIX_2210_ASSERT_IN_BW_DETEC_FX_FOR_OMASA /* FhG: Resolve overflow by swapping the order of addition and multiplication */ -#define FIX_2217_ASSERT_IN_IVAS_CORE_DECODER_WITH_MC /* FhG: Adjust Q_real to prevent overflow in st->cldfbSyn->cldfb_state_fx scaling */ -#define FIX_2211_ASSERT_IN_REND_CREND_CONVOLER /* FhG: Add headroom to p_output_fx to prevent overflow in ivas_rend_crendProcessSubframe_fx() */ - -#define NONBE_FIX_2205_SATURATE_ALTERNATIVE -#define NONBE_FIX_2206_SATURATE_ALTERNATIVE -#define FIX_2226_ISAR_PRE_CRASH_CLDFB_NO_CHANNELS /* Dolby: Fix crash of ISAR pre-renderer due to an attempt of re-scaling uninitialized values in the CLDFB filter bank */ -#define RTP_SR_CODEC_FRAME_SIZE_IN_TOC_BYTE /* Dolby: CR for split rendering codec framesize signalling in Toc Byte */ - - -#define NONBE_FIX_ISSUE_2232_CHECK_CLDFB_STATES /* FhG: Adjust scaleFactor according to st->cldfbSyn->cldfb_state_fx too to avoid overflow in cldfbSynthesis_ivas_fx() */ - -#define NONBE_FIX_BASOP_2233_RTPDUMP_DIFFERING_BITSTREAMS /* Nokia: fix basop issue 2233: Fix differing rtpdump streams */ - -#define NONBE_FIX_2237_ZERO_CURR_NOISE_PROBLEM /* FhG: Modify sum of hTonalMDCTConc->curr_noise_nrg to avoid inaccurate zero */ - -#define NONBE_2169_BINAURAL_MIXING_MATRIX_OPT /* Dlb: use dedicated formulate2x2MixingMatrix() function if cross terms are zero */ -/* ################### End FIXES switches ########################### */ - -/* #################### Start BASOP porting switches ############################ */ - -#define NONBE_1244_FIX_SWB_BWE_MEMORY /* VA: issue 1244: fix to SWB BWE memory in case of switching from FB coding - pending a review by Huawei */ -#define FIX_1053_REVERB_RECONFIGURATION -#define FIX_1119_SPLIT_RENDERING_VOIP /* FhG: Add split rendering support to decoder in VoIP mode */ + +/* ################### Start BE switches ################################# */ +/* only BE switches wrt wrt. TS 26.251 V3.0 */ + +/*#define FIX_I4_OL_PITCH*/ /* fix open-loop pitch used for EVS core switching */ +#define FIX_1990_SANITIZER_IN_REVERB_LOAD /* Nokia: Fix issue part of issue 1990 by introducing missing free of structure - keep until #2059 is addressed */ +#define FIX_1999_TEMPORARY_DISABLE_DIST_ATT_CHECK /* Eri: Issue 1999: Range check on float values of distance attenuation, while the float values are not propagated to this function. The test is not correct, but configurable distance attenuation is not used in Characterization.*/ #define TMP_1342_WORKAROUND_DEC_FLUSH_BROKEN_IN_SR /* FhG: Temporary workaround for incorrect implementation of decoder flush with split rendering */ #define NONBE_1122_KEEP_EVS_MODE_UNCHANGED /* FhG: Disables fix for issue 1122 in EVS mode to keep BE tests green. This switch should be removed once the 1122 fix is added to EVS via a CR. */ #define FIX_1435_MOVE_STEREO_PANNING /* VA: issue 1435: do the EVS stereo panning in the renderer */ @@ -175,6 +126,7 @@ /* ##################### End NON-BE switches ########################### */ + /* ################## End MAINTENANCE switches ######################### */ /* clang-format on */ @@ -187,7 +139,7 @@ #define NONBE_OPT_2239_IVAS_FILTER_PROCESS /* Dolby: Issue 2239, optimize ivas_filter_process_fx. */ #define NONBE_OPT_2193_EIG2X2 /* Dolby: Issue 2193, optimize eig2x2_fx. */ #define BE_FIX_2240_COMPUTE_COV_MTC_FX_FAST /* FhG: Speeds up covariance calculation e.g. 60 WMOPS for encoding -mc 7_1_4 24400 48 */ - +#define NONBE_2169_BINAURAL_MIXING_MATRIX_OPT /* Dolby: use optimized formulate2x2MixingMatrix() function and subfunctions */ /* #################### End BASOP optimization switches ############################ */ diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c index 53d7aac32..33eb96e4d 100644 --- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c +++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c @@ -51,17 +51,19 @@ static Word16 slot_fx[4] = { 32767, 16384, 10922, 8192 }; #define STEREO_PREPROCESS_IIR_FACTOR_Q15 ( 29491 ) // 0.9f in Q15 -/* powf(0.95f, 4.0f) for sub-frame smoothing instead of CLDFB slot */ -#define ADAPT_HTPROTO_ROT_LIM_0_FX 429496736 // 0.4f in Q30 -#define TWO_POINT_FIVE_IN_Q13 20480 // Q13 -#define ADAPT_HTPROTO_IIR_FAC_FX 26689 // Q15 -#define LOG_10_BASE_2_Q29 1783446528 // Q29 -#define TAN_30_FX 17157 // Q15 -#define INV_TAN30_FX 28377 // Q14 -#define EPSILON_MANT 1180591621 /* 1e-12 = 0.5497558*(2^-39) in Q70 */ + /* powf(0.95f, 4.0f) for sub-frame smoothing instead of CLDFB slot */ +#define ADAPT_HTPROTO_ROT_LIM_0_FX 429496736 // 0.4f in Q30 +#define TWO_POINT_FIVE_IN_Q13 20480 // Q13 +#define ADAPT_HTPROTO_IIR_FAC_FX 26689 // Q15 +#define LOG_10_BASE_2_Q29 1783446528 // Q29 +#define TAN_30_FX 17157 // Q15 +#define INV_TAN30_FX 28377 // Q14 +#define EPSILON_MANT 1180591621 /* 1e-12 = 0.5497558*(2^-39) in Q70 */ #define EPSILON_EXP ( -39 ) #define ONE_DIV_EPSILON_MANT 1953125000 /* 1e+12 = 0.9094947*(2^40) */ #define ONE_DIV_EPSILON_EXP ( 40 ) +#define TEN_THOUSAND_Q17 1310720000 /* 10000.0 in Q17 */ +#define ONE_DIV_100000000_Q57 1441147784 /* 1.0 / (10000.0 * 10000.0) in Q57 */ #define SMALL_EIGENVALUE 50 @@ -116,9 +118,11 @@ static void getDirectPartGains_fx( const Word16 bin, Word16 aziDeg, Word16 eleDe static void ivas_masa_ext_rend_parambin_internal_fx( MASA_EXT_REND_HANDLE hMasaExtRend, COMBINED_ORIENTATION_HANDLE hCombinedOrientationData, Word32 *output_fx[] /*Q11*/, const Word16 subframe, const SPLIT_REND_WRAPPER *hSplitRendWrapper, Word32 Cldfb_Out_Real[][CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX], Word32 Cldfb_Out_Imag[][CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX] ); static void formulate2x2MixingMatrix_fx( Word32 Ein1_fx /*q_Ein*/, Word32 Ein2_fx /*q_Ein*/, Word16 q_Ein, Word32 CinRe_fx /*q_Cin*/, Word32 CinIm_fx /*q_Cin*/, Word16 q_Cin, Word32 Eout1_fx /*q_Eout*/, Word32 Eout2_fx /*q_Eout*/, Word16 q_Eout, Word32 CoutRe_fx /*q_Cout*/, Word32 CoutIm_fx /*q_Cout*/, Word16 q_Cout, Word32 Q_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*Q31*/, Word32 Mre_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_M*/, Word32 Mim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_M*/, Word16 *q_M, const Word16 regularizationFactor_fx /*Q14*/ ); -#ifdef NONBE_2169_BINAURAL_MIXING_MATRIX_OPT -static void formulate2x2MixingMatrixNoCross_fx( Word32 Ein1_fx /*q_Ein*/, Word32 Ein2_fx /*q_Ein*/, Word16 q_Ein, Word32 Eout1_fx /*q_Eout*/, Word32 Eout2_fx /*q_Eout*/, Word16 q_Eout, Word32 CoutRe_fx /*q_Cout*/, Word32 CoutIm_fx /*q_Cout*/, Word16 q_Cout, Word32 Q_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*Q31*/, Word32 Mre_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_M*/, Word32 Mim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_M*/, Word16* q_M ); -#endif /* NONBE_2169_BINAURAL_MIXING_MATRIX_OPT */ + +#ifdef OPT_2182_MATRIX_SCALE_OPS +static void matrixScale_fx( Word32 Are_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_A*/, Word32 Aim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_A*/, Word16 *q_A ); +#endif + static void matrixMul_fx( Word32 Are[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_A*/, Word32 Aim[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_A*/, Word16 *q_A, Word32 Bre[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_B*/, Word32 Bim[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_B*/, Word16 *q_B, Word32 outRe[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_out*/, Word32 outIm[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_out*/, Word16 *q_out ); #ifdef OPT_2182_MATRIX_SCALE_OPS @@ -1196,8 +1200,8 @@ static void ivas_dirac_dec_binaural_internal_fx( move16(); FOR( ch = 0; ch < BINAURAL_CHANNELS; ch++ ) { - scale_sig32( output_fx[ch], imult1616( nBins, hSpatParamRendCom->subframe_nbslots[subframe] ), sub( 11, q_out ) ); // Scaling to Q11 - scale_sig32( st_ivas->cldfbSynDec[ch]->cldfb_state_fx, st_ivas->cldfbSynDec[ch]->cldfb_size, sub( Q11, st_ivas->cldfbSynDec[ch]->Q_cldfb_state ) ); + Scale_sig32( output_fx[ch], imult1616( nBins, hSpatParamRendCom->subframe_nbslots[subframe] ), sub( 11, q_out ) ); // Scaling to Q11 + Scale_sig32( st_ivas->cldfbSynDec[ch]->cldfb_state_fx, st_ivas->cldfbSynDec[ch]->cldfb_size, sub( Q11, st_ivas->cldfbSynDec[ch]->Q_cldfb_state ) ); st_ivas->cldfbSynDec[ch]->Q_cldfb_state = Q11; move16(); } @@ -2353,22 +2357,12 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx( CrCrossIm_fx = Mpy_32_32( CrCrossIm_fx, decorrelationReductionFactor_fx ); q_CrCross = sub( add( q_CrCross, q_decorrelationReductionFactor ), 31 ); -#ifndef NONBE_2169_BINAURAL_MIXING_MATRIX_OPT formulate2x2MixingMatrix_fx( hDiracDecBin->ChEne_fx[0][bin], hDiracDecBin->ChEne_fx[1][bin], hDiracDecBin->q_ChEne, 0, 0, /* Decorrelated signal has ideally no cross-terms */ Q31, CrEneL_fx, CrEneR_fx, q_CrEne, CrCrossRe_fx, CrCrossIm_fx, q_CrCross, prototypeMtx_fx, MdecRe_fx, MdecIm_fx, &q_Mdec, 3277 ); // 3277 = 0.2 in Q14 -#else /* NONBE_2169_BINAURAL_MIXING_MATRIX_OPT */ - /* Determine a residual mixing matrix Mdec for processing the decorrelated signal to obtain - * the residual signal (that has the residual covariance matrix) - * Decorrelated signal has ideally no cross-terms */ - formulate2x2MixingMatrixNoCross_fx( hDiracDecBin->ChEne_fx[0][bin], hDiracDecBin->ChEne_fx[1][bin], hDiracDecBin->q_ChEne, - CrEneL_fx, CrEneR_fx, q_CrEne, - CrCrossRe_fx, CrCrossIm_fx, q_CrCross, - prototypeMtx_fx, MdecRe_fx, MdecIm_fx, &q_Mdec ); -#endif /* NONBE_2169_BINAURAL_MIXING_MATRIX_OPT */ } ELSE { @@ -3503,6 +3497,7 @@ static void ivas_dirac_dec_binaural_check_and_switch_transports_headtracked_fx( return; } +#ifndef NONBE_2169_BINAURAL_MIXING_MATRIX_OPT #ifdef NONBE_OPT_2193_EIG2X2 static Word32 eig2x2_div_fx( Word32 num, Word32 den ); @@ -3516,6 +3511,7 @@ static Word32 eig2x2_div_fx( Word32 num, Word32 den ) } #endif + static void eig2x2_fx( const Word32 E1_fx, /*q_E*/ const Word32 E2_fx, /*q_E*/ @@ -3880,15 +3876,15 @@ static void eig2x2_fx( q_e = add( q_E, exp ); /*crossSquare_fx = (c_re * c_re) + (c_im * c_im) - a_fx = (e1 + e2) * (e1 + e2) - 4.0f * ((e1 * e2) - crossSquare_fx) = (e1 - e2)^2 + 4 * crossSquare_fx - pm_fx = 0.5f * sqrtf(max(0.0f, a_fx)) - add_fx = 0.5f * (e1 + e2)*/ + a_fx = (e1 + e2) * (e1 + e2) - 4.0f * ((e1 * e2) - crossSquare_fx) = (e1 - e2)^2 + 4 * crossSquare_fx + pm_fx = 0.5f * sqrtf(max(0.0f, a_fx)) + add_fx = 0.5f * (e1 + e2)*/ IF( L_and( c_re == 0, c_im == 0 ) ) { /* if c_re = 0 and c_im = 0, then crossSquare_fx = (c_re * c_re) + (c_im * c_im) = 0 - a_fx = (E1 - E2)^2 - pm_fx = 0.5 * sqrt(max(0, a_fx)) = 0.5 * max(0, (e1 - e2)) */ + a_fx = (E1 - E2)^2 + pm_fx = 0.5 * sqrt(max(0, a_fx)) = 0.5 * max(0, (e1 - e2)) */ crossSquare_fx = 0; move32(); q_crossSquare = Q31; @@ -3904,7 +3900,7 @@ static void eig2x2_fx( IF( EQ_32( e1, e2 ) ) { /* if e1 - e2 = 0, then a_fx = 4 * crossSquare_fx - pm_fx = 0.5 * sqrt(max(0, 4 * crossSquare_fx)) = sqrt(0, crossSquare_fx)*/ + pm_fx = 0.5 * sqrt(max(0, 4 * crossSquare_fx)) = sqrt(0, crossSquare_fx)*/ test(); test(); IF( EQ_32( c_re, 0 ) || LT_32( L_abs( c_re ), ONE_IN_Q15 ) ) @@ -3931,8 +3927,8 @@ static void eig2x2_fx( ELSE { /* if e1, e2 >> c_re, c_im then (e1 - e2)^2 ~ (e1 - e2)^2 + 4 * crossSquare_fx - a_fx = (E1 - E2)^2 - pm_fx = 0.5 * sqrt(max(0, a_fx)) = 0.5 * max(0, (e1 - e2)) */ + a_fx = (E1 - E2)^2 + pm_fx = 0.5 * sqrt(max(0, a_fx)) = 0.5 * max(0, (e1 - e2)) */ IF( GT_16( sub( q_c, q_e ), Q15 ) ) { @@ -4211,6 +4207,7 @@ static void eig2x2_fx( #endif return; } +#endif static void matrixDiagMul_fx( Word32 reIn_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_In*/ @@ -4600,34 +4597,42 @@ static void matrixTransp2Mul_fx( return; } +#ifdef NONBE_2169_BINAURAL_MIXING_MATRIX_OPT +static Word32 Isqrt_newton_iter( Word32 val, Word32 isqrt_val, Word16 q_val, Word16 exp ) +{ + Word32 temp; + + /* newton iteration for y = 1 / sqrt(x) + * y = y * 2 * (0.75 - 0.25 * x * y * y) */ + + temp = Mpy_32_32( isqrt_val, isqrt_val ); + temp = Mpy_32_32( val, temp ); + exp = sub( Q31, sub( q_val, add( exp, exp ) ) ); + + temp = L_sub( 0x60000000, L_shr( temp, sub( 2, exp ) ) ); + temp = Mpy_32_32( isqrt_val, temp ); + if ( val ) + isqrt_val = L_shl( temp, 1 ); + + return ( isqrt_val ); +} static void chol2x2_fx( - const Word32 E1, /*q_E*/ - const Word32 E2, /*q_E*/ + Word32 E1, /*q_E*/ + Word32 E2, /*q_E*/ Word16 q_E, - const Word32 Cre, /*q_C*/ - const Word32 Cim, /*q_C*/ + Word32 Cre, /*q_C*/ + Word32 Cim, /*q_C*/ Word16 q_C, - Word32 outRe[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_out*/ - Word32 outIm[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_out*/ + Word32 outRe[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /* q_out, 1 bit Headroom */ + Word32 outIm[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /* q_out, 1 bit Headroom */ Word16 *q_out ) { Word16 chA, chB; - Word32 sqrtVal_fx, temp; - Word16 exp, q_re1, q_re2, q_re3, q_im, q_tmp; - Word32 e1, e2, c_re, c_im; - Word16 q_e, q_c; - - exp = sub( get_min_scalefactor( E1, E2 ), 1 ); - e1 = L_shl( E1, exp ); - e2 = L_shl( E2, exp ); - q_e = add( q_E, exp ); - - exp = sub( get_min_scalefactor( Cre, Cim ), 1 ); - c_re = L_shl( Cre, exp ); - c_im = L_shl( Cim, exp ); - q_c = add( q_C, exp ); - + Word32 sqrtVal, iSqrtVal; + Word16 q_sqrtVal, q_min; + Word16 exp = 0; + Word16 q_out_0_0, q_out_0_1, q_out_1_0, q_out_1_1; FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ ) { @@ -4639,399 +4644,619 @@ static void chol2x2_fx( move32(); } } + exp = get_min_scalefactor( E1, E2 ); + E1 = L_shl( E1, exp ); + E2 = L_shl( E2, exp ); + q_E = add( q_E, exp ); - IF( GT_32( e1, e2 ) ) /* Perform Cholesky decomposition according to louder channel first */ - { - exp = sub( 31, q_e ); - outRe[0][0] = Sqrt32( e1, &exp ); - move32(); - q_re1 = sub( 31, exp ); - - // 4611686 = 1e-12 in Q62 - IF( outRe[0][0] == 0 ) - { - outRe[1][0] = BASOP_Util_Divide3232_Scale_newton( c_re, 4611686, &exp ); - move32(); - q_re2 = add( sub( 31, exp ), sub( q_c, 62 ) ); + exp = get_min_scalefactor( Cre, Cim ); + Cre = L_shl( Cre, exp ); + Cim = L_shl( Cim, exp ); + q_C = add( q_C, exp ); - outIm[1][0] = BASOP_Util_Divide3232_Scale_newton( c_im, 4611686, &exp ); - move32(); - q_im = add( sub( 31, exp ), sub( q_c, 62 ) ); - } - ELSE - { - Word32 denom; - Word16 den_exp; - Word32 my_outRe, my_outIm; + IF( GT_32( E1, E2 ) ) /* Perform Cholesky decomposition according to louder channel first */ + { + /* outRe[0][0] = sqrt(E1) */ + exp = sub( Q31, q_E ); + iSqrtVal = Isqrt_lc1( E1, &exp ); + iSqrtVal = Isqrt_newton_iter( E1, iSqrtVal, q_E, exp ); + outRe[0][0] = Mpy_32_32( E1, iSqrtVal ); + q_out_0_0 = sub( q_E, exp ); - /* Compute denom = 1.0 / outRe[0][0] */ - denom = ISqrt32( outRe[0][0], &exp ); - denom = Mpy_32_32( denom, denom ); - den_exp = shl( exp, 1 ); + /* outRe[1][0] = Cre / (outRe[0][0]);*/ + q_out_1_0 = Q31; + move16(); + outRe[1][0] = Mpy_32_32( Cre, iSqrtVal ); + if ( Cre != 0 ) + q_out_1_0 = sub( q_C, exp ); - /* Normalise c_re, c_im */ - exp = norm_l( c_re ); - my_outRe = L_shl( c_re, exp ); - q_re2 = add( q_c, exp ); - exp = norm_l( c_im ); - my_outIm = L_shl( c_im, exp ); - q_im = add( q_c, exp ); + /* outIm[1][0] = Cim / (outRe[0][0]); */ + move16(); + outIm[1][0] = Mpy_32_32( Cim, iSqrtVal ); + if ( Cim != 0 ) + q_out_1_0 = sub( q_C, exp ); - /* Multiply and store c_re*denom and c_im*denom */ - outRe[1][0] = Mpy_32_32( denom, my_outRe ); - move32(); - q_re2 = sub( q_re2, den_exp ); + /* outRe[1][1] = sqrt(max(0.0f, E2 - (Cre * Cre + Cim * Cim) / (E1))) */ + sqrtVal = W_extract_h( W_add( W_mult0_32_32( Cre, Cre ), W_mult0_32_32( Cim, Cim ) ) ); - outIm[1][0] = Mpy_32_32( denom, my_outIm ); - move32(); - q_im = sub( q_im, den_exp ); - } - if ( outRe[1][0] == 0 ) - { - q_re2 = Q31; - move16(); - } - if ( outIm[1][0] == 0 ) + /* Special treatment for E1_norm = 0x40000000, Result is known: 2 * sqrtVal */ + IF( GT_32( E1, 0x40000000 ) ) { - q_im = Q31; - move16(); + sqrtVal = div_w_newton( sqrtVal, E1 ); } + q_sqrtVal = sub( add( q_C, q_C ), add( q_E, 2 ) ); /* q_sqrtVal = 2 *q_C - q_E1_norm - 2 */ + q_min = s_min( q_E, q_sqrtVal ); + sqrtVal = L_sub( L_shl( E2, sub( q_min, q_E ) ), L_shl( sqrtVal, sub( q_min, q_sqrtVal ) ) ); - temp = Madd_32_32( Mpy_32_32( c_re, c_re ), c_im, c_im ); - q_tmp = sub( add( q_c, q_c ), 31 ); + sqrtVal = L_max( 0, sqrtVal ); - // 4611686 = Q62 - IF( e1 == 0 ) - { - Word16 norm = norm_l( temp ); - temp = L_shl( temp, norm ); - q_tmp = add( q_tmp, norm ); - temp = Mpy_32_32( temp, ONE_DIV_EPSILON_MANT ); - q_tmp = sub( q_tmp, ONE_DIV_EPSILON_EXP ); - } - ELSE - { - temp = BASOP_Util_Divide3232_Scale_newton( temp, e1, &exp ); - q_tmp = add( sub( 31, exp ), sub( q_tmp, q_e ) ); - } - if ( temp == 0 ) + if ( sqrtVal == 0 ) { - q_tmp = Q31; + q_min = Q31; move16(); } - IF( LT_16( q_e, q_tmp ) ) - { - sqrtVal_fx = L_sub( e2, L_shr( temp, sub( q_tmp, q_e ) ) ); ////q_tmp - q_tmp = q_e; - move16(); - } - ELSE - { - sqrtVal_fx = L_sub( L_shr( e2, sub( q_e, q_tmp ) ), temp ); // q_tmp - } + /* normalize sqrtVal */ + exp = norm_l( sqrtVal ); + sqrtVal = L_shl( sqrtVal, exp ); + q_min = add( q_min, exp ); + exp = sub( Q31, q_min ); + iSqrtVal = Isqrt_lc1( sqrtVal, &exp ); - exp = sub( 31, q_tmp ); - outRe[1][1] = Sqrt32( L_max( 0, sqrtVal_fx ), &exp ); - move32(); - q_re3 = sub( 31, exp ); + outRe[1][1] = Mpy_32_32( sqrtVal, iSqrtVal ); + q_out_1_1 = sub( q_min, exp ); - *q_out = s_min( s_min( q_re1, q_re2 ), s_min( q_re3, q_im ) ); - outRe[0][0] = L_shr( outRe[0][0], sub( q_re1, *q_out ) ); + /* normalize q_out */ + *q_out = s_min( s_min( q_out_0_0, q_out_1_1 ), q_out_1_0 ); + *q_out = sub( *q_out, 1 ); /* 1 bit headroom */ + move16(); + outRe[1][1] = L_shr( outRe[1][1], sub( q_out_1_1, *q_out ) ); move32(); - outRe[1][0] = L_shr( outRe[1][0], sub( q_re2, *q_out ) ); + outRe[1][0] = L_shr( outRe[1][0], sub( q_out_1_0, *q_out ) ); move32(); - outIm[1][0] = L_shr( outIm[1][0], sub( q_im, *q_out ) ); + outIm[1][0] = L_shr( outIm[1][0], sub( q_out_1_0, *q_out ) ); move32(); - outRe[1][1] = L_shr( outRe[1][1], sub( q_re3, *q_out ) ); + outRe[0][0] = L_shr( outRe[0][0], sub( q_out_0_0, *q_out ) ); move32(); } ELSE { - exp = sub( 31, q_e ); - outRe[1][1] = Sqrt32( e2, &exp ); - move32(); - q_re1 = sub( 31, exp ); + /* outRe[1][1] = sqrt(E2) */ + exp = sub( Q31, q_E ); + iSqrtVal = Isqrt_lc1( E2, &exp ); + outRe[1][1] = Mpy_32_32( E2, iSqrtVal ); + q_out_1_1 = sub( q_E, exp ); - // 4611686 = Q62 - IF( outRe[1][1] == 0 ) - { - // outRe[0][1] = BASOP_Util_Divide3232_Scale_newton( c_re, 4611686, &exp ); - Word32 tmp1 = 1953125005; /* 1/4611686 Q62 */ - exp = 9; + /* outRe[0][1] = Cre / (outRe[1][1]);*/ + q_out_0_1 = Q31; + move16(); + outRe[0][1] = Mpy_32_32( Cre, iSqrtVal ); + if ( Cre != 0 ) + q_out_0_1 = sub( q_C, exp ); - outRe[0][1] = Mpy_32_32( tmp1, c_re ); - move32(); - q_re2 = add( sub( 31, exp ), sub( q_c, 62 ) ); + /* outIm[0][1] = Cim / (outRe[1][1]); */ + outIm[0][1] = L_negate( Mpy_32_32( Cim, iSqrtVal ) ); + if ( Cim != 0 ) + q_out_0_1 = sub( q_C, exp ); - // outIm[0][1] = BASOP_Util_Divide3232_Scale_newton( -c_im, 4611686, &exp ); - outIm[0][1] = Mpy_32_32( tmp1, -c_im ); - move32(); - q_im = add( sub( 31, exp ), sub( q_c, 62 ) ); - } - ELSE - { - { - // outRe[0][1] = BASOP_Util_Divide3232_Scale_newton( c_re, outRe[1][1], &exp ); - Word32 tmp1 = BASOP_Util_Divide3232_Scale_newton( 0x7FFFFFFF, outRe[1][1], &exp ); - outRe[0][1] = Mpy_32_32( tmp1, c_re ); - move32(); - q_re2 = add( sub( 31, exp ), sub( q_c, q_re1 ) ); + /* outRe[0][0] = sqrt(max(0.0f, E1 - (Cre * Cre + Cim * Cim) / (E2))) */ + sqrtVal = W_extract_h( W_add( W_mult0_32_32( Cre, Cre ), W_mult0_32_32( Cim, Cim ) ) ); /* q_sqrtVal = 2 * q_C - 1 */ - // outIm[0][1] = BASOP_Util_Divide3232_Scale_newton( -c_im, outRe[1][1], &exp ); - outIm[0][1] = Mpy_32_32( tmp1, -c_im ); - move32(); - q_im = add( sub( 31, exp ), sub( q_c, q_re1 ) ); - } - } - if ( outRe[0][1] == 0 ) - { - q_re2 = Q31; - move16(); - } - if ( outIm[0][1] == 0 ) + /* Special treatment for E2_norm = 0x40000000, Result is known: 2 * sqrtVal */ + IF( GT_32( E2, 0x40000000 ) ) { - q_im = Q31; - move16(); + sqrtVal = div_w_newton( sqrtVal, E2 ); } + q_sqrtVal = sub( add( q_C, q_C ), add( q_E, 2 ) ); /* q_sqrtVal = 2 *q_C - q_E2_norm - 2 */ + q_min = s_min( q_E, q_sqrtVal ); + sqrtVal = L_sub( L_shl( E1, q_min - q_E ), L_shl( sqrtVal, q_min - q_sqrtVal ) ); - temp = Madd_32_32( Mpy_32_32( c_re, c_re ), c_im, c_im ); - q_tmp = sub( add( q_c, q_c ), 31 ); + sqrtVal = L_max( sqrtVal, 0 ); - // 4611686 = 1e-12 in Q62 - IF( e2 == 0 ) - { - temp = BASOP_Util_Divide3232_Scale_newton( temp, 4611686, &exp ); - q_tmp = add( sub( 31, exp ), sub( q_tmp, 62 ) ); - } - ELSE - { - temp = BASOP_Util_Divide3232_Scale_newton( temp, e2, &exp ); - q_tmp = add( sub( 31, exp ), sub( q_tmp, q_e ) ); - } - if ( temp == 0 ) + if ( sqrtVal == 0 ) { - q_tmp = Q31; + q_min = Q31; move16(); } - IF( LT_16( q_e, q_tmp ) ) - { - sqrtVal_fx = L_sub( e1, L_shr( temp, sub( q_tmp, q_e ) ) ); ////q_tmp - q_tmp = q_e; - move16(); - } - ELSE - { - sqrtVal_fx = L_sub( L_shr( e1, sub( q_e, q_tmp ) ), temp ); ////q_tmp - } + /* normalize sqrtVal */ + exp = norm_l( sqrtVal ); + sqrtVal = L_shl( sqrtVal, exp ); + q_min = add( q_min, exp ); + exp = sub( Q31, q_min ); + iSqrtVal = Isqrt_lc1( sqrtVal, &exp ); - exp = sub( 31, q_tmp ); - outRe[0][0] = Sqrt32( L_max( 0, sqrtVal_fx ), &exp ); - move32(); - q_re3 = sub( 31, exp ); + outRe[0][0] = Mpy_32_32( sqrtVal, iSqrtVal ); + q_out_0_0 = sub( q_min, exp ); - *q_out = s_min( s_min( q_re1, q_re2 ), s_min( q_re3, q_im ) ); + /* normalize q_out */ + *q_out = s_min( s_min( q_out_0_0, q_out_1_1 ), q_out_0_1 ); + *q_out = sub( *q_out, 1 ); /* 1 bit headroom */ move16(); - outRe[1][1] = L_shr( outRe[1][1], sub( q_re1, *q_out ) ); + outRe[1][1] = L_shr( outRe[1][1], sub( q_out_1_1, *q_out ) ); move32(); - outRe[0][1] = L_shr( outRe[0][1], sub( q_re2, *q_out ) ); + outRe[0][1] = L_shr( outRe[0][1], sub( q_out_0_1, *q_out ) ); move32(); - outIm[0][1] = L_shr( outIm[0][1], sub( q_im, *q_out ) ); + outIm[0][1] = L_shr( outIm[0][1], sub( q_out_0_1, *q_out ) ); move32(); - outRe[0][0] = L_shr( outRe[0][0], sub( q_re3, *q_out ) ); + outRe[0][0] = L_shr( outRe[0][0], sub( q_out_0_0, *q_out ) ); move32(); } - return; } -#ifdef NONBE_2169_BINAURAL_MIXING_MATRIX_OPT -static void formulate2x2MixingMatrixNoCross_fx( - Word32 Ein1_fx /*q_Ein*/, - Word32 Ein2_fx /*q_Ein*/, - Word16 q_Ein, - Word32 Eout1_fx /*q_Eout*/, - Word32 Eout2_fx /*q_Eout*/, - Word16 q_Eout, - Word32 CoutRe_fx /*q_Cout*/, - Word32 CoutIm_fx /*q_Cout*/, - Word16 q_Cout, - Word32 Q_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*Q31*/, - Word32 Mre_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_M*/, - Word32 Mim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_M*/, - Word16 *q_M ) +static void eig2x2_fx( + Word32 E1_fx, /*q_E*/ + Word32 E2_fx, /*q_E*/ + Word16 q_E, + Word32 Cre_fx, /*q_C*/ + Word32 Cim_fx, /*q_C*/ + Word16 q_C, + Word32 Ure_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_U, 1 bit Headroom */ + Word32 Uim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_U, 1 bit Headroom */ + Word16 *q_U, + Word32 D_fx[BINAURAL_CHANNELS], /*q_D*/ + Word16 *q_D ) { - /* - This function implements a 2x2 solution for an optimized spatial audio rendering algorithm, based on - Vilkamo, J., Bäckström, T. and Kuntz, A., 2013. - "Optimized covariance domain framework for time–frequency processing of spatial audio." - Journal of the Audio Engineering Society, 61(6), pp.403-411. - but optimized for decorrelated signals + Word16 chA, chB, ch; - The result of the formulas below are the same as those in the publication, however, some - derivation details differ for as simple as possible 2x2 formulation - */ - Word16 chA, chB; - Word32 maxEne_fx, tmp, maxEneDiv_fx; - Word16 q_maxEne, q_maxEneDiv, exp, exp1; - Word32 KyRe_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], KyIm_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS]; - Word32 tmpRe_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], tmpIm_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS]; - Word32 Are_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], Aim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS]; - Word32 Ure_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], Uim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS]; - Word32 D_fx[BINAURAL_CHANNELS]; - Word32 div_fx[BINAURAL_CHANNELS]; - Word32 Ghat_fx[BINAURAL_CHANNELS]; - Word32 GhatQ_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS]; - Word32 Pre_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], Pim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS]; - Word16 q_ky, q_A, q_U, q_D, q_P; - Word32 E_in1, E_in2, E_out1, E_out2, Cout_re, Cout_im; - Word16 q_ein, q_eout, q_cout, q_Ghat, q_GhatQ, q_temp, q_div, exp_temp; - Word32 temp; - Word16 q_Pre[BINAURAL_CHANNELS][BINAURAL_CHANNELS], q_Pim[BINAURAL_CHANNELS][BINAURAL_CHANNELS]; - Word16 hdrm_re[BINAURAL_CHANNELS][BINAURAL_CHANNELS], hdrm_im[BINAURAL_CHANNELS][BINAURAL_CHANNELS]; - set16_fx( hdrm_re[0], 63, i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ) ); - set16_fx( hdrm_im[0], 63, i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ) ); - set16_fx( q_Pre[0], Q31, i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ) ); - set16_fx( q_Pim[0], Q31, i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ) ); + Word32 add_fx, sub_fx, pm_fx, isqrt_fx, s1_fx, s2_fx, normVal_fx, normVal_isqrt_fx, temp_fx; + Word64 crossSquare_fx, a_fx; + Word16 q_E2, q_C2, q_a, q_min, exp, q_pm, q_s, q_s2, q_norm; - q_ky = 0; - move16(); + FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ ) + { + FOR( chB = 0; chB < BINAURAL_CHANNELS; chB++ ) + { + Ure_fx[chA][chB] = 0; + move32(); + Uim_fx[chA][chB] = 0; + move32(); + } + } - exp = sub( get_min_scalefactor( Ein1_fx, Ein2_fx ), 1 ); - E_in1 = L_shl( Ein1_fx, exp ); - E_in2 = L_shl( Ein2_fx, exp ); - q_ein = add( q_Ein, exp ); + exp = get_min_scalefactor( E1_fx, E2_fx ); + E1_fx = L_shl( E1_fx, exp ); + E2_fx = L_shl( E2_fx, exp ); + q_E = add( q_E, exp ); + q_E2 = add( q_E, q_E ); - exp = sub( get_min_scalefactor( Eout1_fx, Eout2_fx ), 1 ); - E_out1 = L_shl( Eout1_fx, exp ); - E_out2 = L_shl( Eout2_fx, exp ); - q_eout = add( q_Eout, exp ); + exp = get_min_scalefactor( Cre_fx, Cim_fx ); + Cre_fx = L_shl( Cre_fx, exp ); + Cim_fx = L_shl( Cim_fx, exp ); + q_C = add( q_C, exp ); + q_C2 = add( q_C, q_C ); - exp = sub( get_min_scalefactor( CoutRe_fx, CoutIm_fx ), 1 ); - Cout_re = L_shl( CoutRe_fx, exp ); - Cout_im = L_shl( CoutIm_fx, exp ); - q_cout = add( q_Cout, exp ); + add_fx = W_extract_l( W_shr( W_add( W_deposit32_l( E1_fx ), W_deposit32_l( E2_fx ) ), 1 ) ); /* 0.5 * ( E1 + E2 ) */ + sub_fx = L_sub( E1_fx, E2_fx ); - /* Normalize energy values */ - maxEne_fx = L_max( E_in1, E_in2 ); - q_maxEne = q_ein; + /* + * crossSquare = (Cre * Cre) + (Cim * Cim) + */ + crossSquare_fx = W_add( W_mult0_32_32( Cre_fx, Cre_fx ), W_mult0_32_32( Cim_fx, Cim_fx ) ); /* 2 * q_C */ + + /* + * a = (E1 + E2) * (E1 + E2) - 4.0f * ((E1 * E2) - crossSquare) + * a = (E1 - E2) ^ 2 + 4 * crossSquare + */ + + a_fx = W_mult0_32_32( sub_fx, sub_fx ); /* 2 * q_E */ + + q_C2 = add( q_C, q_C ); + q_a = sub( s_min( q_E2, q_C2 ), 2 ); + a_fx = W_add( W_shr( a_fx, sub( q_E2, q_a ) ), W_shr( crossSquare_fx, sub( q_C2, add( q_a, 2 ) ) ) ); + + exp = W_norm( a_fx ); + a_fx = W_shl( a_fx, exp ); + q_a = add( q_a, exp ); + pm_fx = W_extract_h( a_fx ); + pm_fx = L_max( 0, pm_fx ); + q_pm = sub( q_a, Q31 + 1 ); + + /* + * add = 0.5f * ( E1 + E2 ) + * pm = 0.5f * sqrtf( max( 0.0f, a ) ) + * add = 0.5f * ( E1 + E2 ) + * D[0] = add + pm + * D[1] = max( 0.0f, add - pm ) + */ + + q_min = q_E; move16(); - tmp = L_max( E_out1, E_out2 ); - IF( LT_16( q_maxEne, q_eout ) ) + IF( GT_32( pm_fx, 0 ) ) { - maxEne_fx = L_max( maxEne_fx, L_shr( tmp, sub( q_eout, q_maxEne ) ) ); // q_maxEne + /* pm_fx is normalized */ + exp = sub( Q31, q_pm ); + isqrt_fx = Isqrt_lc1( pm_fx, &exp ); + isqrt_fx = Isqrt_newton_iter( pm_fx, isqrt_fx, q_pm, exp ); + + pm_fx = Mpy_32_32( pm_fx, isqrt_fx ); + q_pm = sub( q_pm, exp ); + q_pm = add( q_pm, 1 ); /* 0.5 * sqrt( pm ) */ + + /* normalize */ + q_min = s_min( q_pm, q_E ); + q_min = sub( q_min, 1 ); + add_fx = L_shr( add_fx, q_E - q_min ); + pm_fx = L_shr( pm_fx, q_pm - q_min ); } - ELSE + + + temp_fx = L_abs( L_sub( add_fx, pm_fx ) ); + + if ( LT_32( temp_fx, ONE_IN_Q7 ) ) { - maxEne_fx = L_max( L_shr( maxEne_fx, sub( q_maxEne, q_eout ) ), tmp ); // q_maxEne - q_maxEne = q_eout; - move16(); + pm_fx = add_fx; + move32(); } - // 4611686 = Q62 - IF( maxEne_fx == 0 ) + D_fx[0] = L_add( add_fx, pm_fx ); + D_fx[1] = L_max( 0, L_sub( add_fx, pm_fx ) ); + + *q_D = q_min; + move16(); + + *q_U = Q30; + move16(); + /* Numeric case, when input is practically zeros */ + IF( LT_32( L_shl_sat( D_fx[0], sub( sub( 31, *q_D ), EPSILON_EXP ) ), EPSILON_MANT ) ) { - maxEneDiv_fx = ONE_DIV_EPSILON_MANT; - move32(); - q_maxEneDiv = 31 - ONE_DIV_EPSILON_EXP; - move16(); + Ure_fx[0][0] = ONE_IN_Q30; + Ure_fx[1][1] = ONE_IN_Q30; + return; } - ELSE + + /* Numeric case, when input is near an identity matrix with a gain */ + IF( LT_32( pm_fx, Mpy_32_32( INV_1000_Q31, add_fx ) ) ) { - maxEneDiv_fx = BASOP_Util_Divide3232_Scale_newton( ONE_IN_Q30, maxEne_fx, &exp ); - q_maxEneDiv = add( sub( 31, exp ), sub( Q30, q_maxEne ) ); + Ure_fx[0][0] = ONE_IN_Q30; + Ure_fx[1][1] = ONE_IN_Q30; + return; } - exp = norm_l( maxEneDiv_fx ); - maxEneDiv_fx = L_shl( maxEneDiv_fx, exp ); - q_maxEneDiv = add( q_maxEneDiv, exp ); - E_in1 = Mpy_32_32( E_in1, maxEneDiv_fx ); - E_in2 = Mpy_32_32( E_in2, maxEneDiv_fx ); - q_ein = sub( add( q_ein, q_maxEneDiv ), 31 ); + /* Eigenvectors */ + q_s = s_min( *q_D, q_E ); + q_s2 = add( q_s, q_s ); + FOR( ch = 0; ch < BINAURAL_CHANNELS; ch++ ) + { + s1_fx = L_sub( L_shr( D_fx[ch], *q_D - q_s ), L_shr( E1_fx, q_E - q_s ) ); + s2_fx = L_sub( L_shr( D_fx[ch], *q_D - q_s ), L_shr( E2_fx, q_E - q_s ) ); - E_out1 = Mpy_32_32( E_out1, maxEneDiv_fx ); - E_out2 = Mpy_32_32( E_out2, maxEneDiv_fx ); - q_eout = sub( add( q_eout, q_maxEneDiv ), 31 ); + IF( L_abs( s2_fx ) > L_abs( s1_fx ) ) + { + a_fx = W_mult0_32_32( s2_fx, s2_fx ); /* 2*q_s */ + q_min = s_min( q_s2, q_C2 ); + a_fx = W_shr( a_fx, sub( q_s2, q_min ) ); + a_fx = W_add( a_fx, W_shr( crossSquare_fx, sub( q_C2, q_min ) ) ); /* add 1e-12 ommitted */ - Cout_re = Mpy_32_32( Cout_re, maxEneDiv_fx ); - Cout_im = Mpy_32_32( Cout_im, maxEneDiv_fx ); - q_cout = sub( add( q_cout, q_maxEneDiv ), 31 ); + exp = W_norm( a_fx ); + a_fx = W_shl( a_fx, exp ); + normVal_fx = W_extract_h( a_fx ); + q_norm = sub( add( q_min, exp ), Q31 + 1 ); - /* Cholesky decomposition of target / output covariance matrix */ - chol2x2_fx( E_out1, E_out2, q_eout, Cout_re, Cout_im, q_cout, KyRe_fx, KyIm_fx, &q_ky ); + /* normVal_fx is normalized */ + exp = sub( Q31, q_norm ); + normVal_isqrt_fx = Isqrt_lc1( normVal_fx, &exp ); + normVal_isqrt_fx = Isqrt_newton_iter( normVal_fx, normVal_isqrt_fx, q_norm, exp ); - /* If there are no cross-terms, the Eigendecomposition of input covariance matrix - can be skipped. Uxre is a unit matrix, Uxim is a zero matrix and Sx is (1, 1) - Further on, also Kxre is a unit matrix and Kxim is a zero matrix - Multiplication with these matrices / scalars can be skipped - */ + q_norm = add( Q31, exp ); - temp = Mpy_32_32( E_in2, INV_1000_Q31 ); - temp = L_max( temp, E_in1 ); + a_fx = W_mult0_32_32( s2_fx, normVal_isqrt_fx ); /* q_s + q_norm */ + a_fx = W_shl( a_fx, sub( q_norm, q_s ) ); /* Q62 */ + Ure_fx[0][ch] = W_extract_h( a_fx ); /* Q30 */ - IF( temp == 0 ) - { - IF( E_out1 == 0 ) - { - Ghat_fx[0] = 0; - exp = -19; - move32(); - move16(); + a_fx = W_mult0_32_32( Cre_fx, normVal_isqrt_fx ); /* q_C + q_norm */ + a_fx = W_shl( a_fx, sub( q_norm, q_C ) ); /* Q62 */ + Ure_fx[1][ch] = W_extract_h( a_fx ); /* Q30 */ + + a_fx = W_mult0_32_32( Cim_fx, normVal_isqrt_fx ); /* q_C + q_norm */ + a_fx = W_shl( a_fx, sub( q_norm, q_C ) ); /* Q62 */ + Uim_fx[1][ch] = W_extract_h( a_fx ); /* Q30 */ } ELSE { - temp = BASOP_Util_Divide3232_Scale_newton( E_out1, 4611686, &exp ); // 4611686 = Q62 - exp = sub( exp, sub( q_eout, 62 ) ); - Ghat_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp + a_fx = W_mult0_32_32( s1_fx, s1_fx ); // 2*q_s + q_min = s_min( q_s2, q_C2 ); + a_fx = W_shr( a_fx, sub( q_s2, q_min ) ); + a_fx = W_add( a_fx, W_shr( crossSquare_fx, sub( q_C2, q_min ) ) ); /* add 1e-12 ommitted */ + exp = W_norm( a_fx ); + a_fx = W_shl( a_fx, exp ); + normVal_fx = W_extract_h( a_fx ); + q_norm = sub( add( q_min, exp ), Q31 + 1 ); + + /* normVal_fx is normalized */ + exp = sub( Q31, q_norm ); + normVal_isqrt_fx = Isqrt_lc1( normVal_fx, &exp ); + normVal_isqrt_fx = Isqrt_newton_iter( normVal_fx, normVal_isqrt_fx, q_norm, exp ); + + q_norm = add( Q31, exp ); + + a_fx = W_mult0_32_32( s1_fx, normVal_isqrt_fx ); /* q_s + q_norm */ + a_fx = W_shl( a_fx, sub( q_norm, q_s ) ); /* Q62 */ + Ure_fx[1][ch] = W_extract_h( a_fx ); /* Q30 */ + + a_fx = W_mult0_32_32( Cre_fx, normVal_isqrt_fx ); /* q_C + q_norm */ + a_fx = W_shl( a_fx, sub( q_norm, q_C ) ); /* Q62 */ + Ure_fx[0][ch] = W_extract_h( a_fx ); /* Q30 */ + + a_fx = W_mult0_32_32( Cim_fx, normVal_isqrt_fx ); /* q_C + q_norm */ + a_fx = W_shl( a_fx, sub( q_norm, q_C ) ); /* Q62 */ + Uim_fx[0][ch] = L_negate( W_extract_h( a_fx ) ); /* Q30 */ } } +} + + +static void formulate2x2MixingMatrix_fx( + Word32 Ein1_fx, /*q_Ein*/ + Word32 Ein2_fx, /*q_Ein*/ + Word16 q_Ein, + Word32 CinRe_fx, /*q_Cin*/ + Word32 CinIm_fx, /*q_Cin*/ + Word16 q_Cin, + Word32 Eout1_fx, /*q_Eout*/ + Word32 Eout2_fx, /*q_Eout*/ + Word16 q_Eout, + Word32 CoutRe_fx, /*q_Cout*/ + Word32 CoutIm_fx, /*q_Cout*/ + Word16 q_Cout, + Word32 Q_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /* Q31 */ + Word32 Mre_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /* q_M */ + Word32 Mim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /* q_M */ + Word16 *q_M, + const Word16 regularizationFactor_fx /* Q14 */ ) +{ + /* + This function implements a 2x2 solution for an optimized spatial audio rendering algorithm + Vilkamo, J., Bäckström, T. and Kuntz, A., 2013. + "Optimized covariance domain framework for time–frequency processing of spatial audio." + Journal of the Audio Engineering Society, 61(6), pp.403-411. + + The result of the formulas below are the same as those in the publication, however, some + derivation details differ for as simple as possible 2x2 formulattion + */ + Word16 chA, chB; + Word32 maxEneIn_fx, maxEneOut_fx, maxEne_fx, maxEneDiv_fx; + Word16 q_maxEne, q_maxEneDiv, exp; + Word32 KyRe_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], KyIm_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS]; + Word32 Uxre_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], Uxim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS]; + Word32 Sx_fx[BINAURAL_CHANNELS], Sx_Inv_fx[BINAURAL_CHANNELS], Kxre_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], Kxim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS]; + Word32 tmpRe_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], tmpIm_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS]; + Word32 Are_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], Aim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS]; + Word32 Ure_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], Uim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS]; + Word32 D_fx[BINAURAL_CHANNELS], D_fx_norm; + Word32 div_fx[BINAURAL_CHANNELS]; + Word32 Ghat_fx[BINAURAL_CHANNELS]; + Word32 GhatQ_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS]; + Word32 Pre_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], Pim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS]; + Word16 q_ky, q_Sx, q_Ux, q_Kx, q_A, q_U, q_D, q_P, q_Sx_Inv, q_Sx_0, q_Sx_1, q_Sx_Inv_0, q_Sx_Inv_1; + Word32 E_in1, E_in2, E_out1, E_out2, Cout_re, Cout_im, Cin_re, Cin_im; + Word16 q_ein, q_eout, q_cin, q_cout, q_Ghat, q_Ghat0, q_Ghat1, q_GhatQ, q_temp, q_div_min, q_div[2], norm_div[2]; + Word32 denom; + Word16 q_Pre[BINAURAL_CHANNELS][BINAURAL_CHANNELS], q_Pim[BINAURAL_CHANNELS][BINAURAL_CHANNELS]; + Word16 hdrm_re[BINAURAL_CHANNELS][BINAURAL_CHANNELS], hdrm_im[BINAURAL_CHANNELS][BINAURAL_CHANNELS]; + set16_fx( hdrm_re[0], 63, i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ) ); + set16_fx( hdrm_im[0], 63, i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ) ); + set16_fx( q_Pre[0], Q31, i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ) ); + set16_fx( q_Pim[0], Q31, i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ) ); + q_ky = 0; + move16(); + q_Sx = 0; + move16(); + q_Ux = 0; + move16(); + q_Kx = 0; + move16(); + + q_ein = q_Ein; + move16(); + q_eout = q_Eout; + move16(); + q_cin = q_Cin; + move16(); + q_cout = q_Cout; + move16(); + + /* Normalize energy values */ + maxEneIn_fx = L_max( Ein1_fx, Ein2_fx ); + move16(); + + maxEneOut_fx = L_max( Eout1_fx, Eout2_fx ); + move16(); + + IF( maxEneIn_fx == 0 || maxEneOut_fx == 0 ) + { + Mre_fx[0][0] = Mre_fx[0][1] = Mre_fx[1][0] = Mre_fx[1][1] = 0; + move32(); + move32(); + move32(); + move32(); + Mim_fx[0][0] = Mim_fx[0][1] = Mim_fx[1][0] = Mim_fx[1][1] = 0; + move32(); + move32(); + move32(); + move32(); + *q_M = Q31; + move16(); + return; + } + + IF( LT_16( q_eout, q_ein ) ) + { + maxEne_fx = L_max( maxEneOut_fx, L_shr( maxEneIn_fx, sub( q_ein, q_eout ) ) ); + q_maxEne = q_eout; + move16(); + } ELSE { - temp = BASOP_Util_Add_Mant32Exp( temp, sub( 31, q_ein ), EPSILON_MANT, EPSILON_EXP, &exp_temp ); + maxEne_fx = L_max( maxEneIn_fx, L_shr( maxEneOut_fx, sub( q_eout, q_ein ) ) ); + q_maxEne = q_ein; + move16(); + } - temp = BASOP_Util_Divide3232_Scale_newton( E_out1, temp, &exp ); - exp = sub( exp, sub( q_eout, sub( 31, exp_temp ) ) ); - Ghat_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp + exp = norm_l( maxEne_fx ); + maxEne_fx = L_shl( maxEne_fx, exp ); + + maxEneDiv_fx = ONE_IN_Q31; + move32(); + + /* Special treatment for den = 0x40000000, Result is known: 2 * ONE_IN_Q31 */ + IF( GT_32( maxEne_fx, 0x40000000 ) ) + { + maxEneDiv_fx = div_w_newton( ONE_IN_Q31, maxEne_fx ); } + + q_maxEneDiv = sub( sub( Q30, exp ), q_maxEne ); + + E_in1 = Mpy_32_32( Ein1_fx, maxEneDiv_fx ); + E_in2 = Mpy_32_32( Ein2_fx, maxEneDiv_fx ); + q_ein = add( q_ein, q_maxEneDiv ); + + Cin_re = Mpy_32_32( CinRe_fx, maxEneDiv_fx ); + Cin_im = Mpy_32_32( CinIm_fx, maxEneDiv_fx ); + q_cin = add( q_cin, q_maxEneDiv ); + + E_out1 = Mpy_32_32( Eout1_fx, maxEneDiv_fx ); + E_out2 = Mpy_32_32( Eout2_fx, maxEneDiv_fx ); + q_eout = add( q_eout, q_maxEneDiv ); + + Cout_re = Mpy_32_32( CoutRe_fx, maxEneDiv_fx ); + Cout_im = Mpy_32_32( CoutIm_fx, maxEneDiv_fx ); + q_cout = add( q_cout, q_maxEneDiv ); + + /* Cholesky decomposition of target / output covariance matrix */ + chol2x2_fx( E_out1, E_out2, q_eout, Cout_re, Cout_im, q_cout, KyRe_fx, KyIm_fx, &q_ky ); + /* Eigendecomposition of input covariance matrix */ + eig2x2_fx( E_in1, E_in2, q_ein, Cin_re, Cin_im, q_cin, Uxre_fx, Uxim_fx, &q_Ux, Sx_fx, &q_Sx ); + /* Eigendecomposition to Kx -- Ux Sx Ux' -> Kx Kx'*/ + + /* calculate sqrt(Sx_fx[0]) and 1.0 / sqrt(Sx_fx[0]) */ + exp = Q31; + move16(); + if ( Sx_fx[0] ) + exp = norm_l( Sx_fx[0] ); + Sx_fx[0] = L_shl( Sx_fx[0], exp ); + q_Sx_0 = add( q_Sx, exp ); + exp = sub( Q31, q_Sx_0 ); + Sx_Inv_fx[0] = Isqrt_lc1( Sx_fx[0], &exp ); + Sx_Inv_fx[0] = Isqrt_newton_iter( Sx_fx[0], Sx_Inv_fx[0], q_Sx_0, exp ); + q_Sx_Inv_0 = sub( Q31, exp ); + Sx_fx[0] = Mpy_32_32( Sx_fx[0], Sx_Inv_fx[0] ); + q_Sx_0 = sub( q_Sx_0, exp ); + + /* calculate sqrt(Sx_fx[1]) and 1.0 / sqrt(Sx_fx[1]) */ + exp = Q31; + move16(); + if ( Sx_fx[1] ) + exp = norm_l( Sx_fx[1] ); + Sx_fx[1] = L_shl( Sx_fx[1], exp ); + q_Sx_1 = add( q_Sx, exp ); + exp = sub( Q31, q_Sx_1 ); + Sx_Inv_fx[1] = Isqrt_lc1( Sx_fx[1], &exp ); + Sx_Inv_fx[1] = Isqrt_newton_iter( Sx_fx[1], Sx_Inv_fx[1], q_Sx_1, exp ); + q_Sx_Inv_1 = sub( Q31, exp ); + Sx_fx[1] = Mpy_32_32( Sx_fx[1], Sx_Inv_fx[1] ); + q_Sx_1 = sub( q_Sx_1, exp ); + + /* normalization */ + q_Sx = s_min( q_Sx_0, q_Sx_1 ); + Sx_fx[0] = L_shr( Sx_fx[0], q_Sx_0 - q_Sx ); + Sx_fx[1] = L_shr( Sx_fx[1], q_Sx_1 - q_Sx ); + + matrixDiagMul_fx( Uxre_fx, Uxim_fx, q_Ux, Sx_fx, q_Sx, Kxre_fx, Kxim_fx, &q_Kx ); + + /* Regularize the diagonal Sx for matrix inversion + Sx_fx[0] = max( Sx_fx[0], regularizationFactor * Sx_fx[1] ) + Sx_fx[1] = max( Sx_fx[1], regularizationFactor * Sx_fx[0] ) + TBD: use the isqrts from above, multiplied by 1/ regularizationFactor */ + + Sx_fx[0] = L_max( Sx_fx[0], L_shl_sat( Mpy_32_16_1( Sx_fx[1], regularizationFactor_fx ), 1 ) ); + Sx_fx[1] = L_max( Sx_fx[1], L_shl_sat( Mpy_32_16_1( Sx_fx[0], regularizationFactor_fx ), 1 ) ); + move32(); move32(); - temp = Mpy_32_32( E_in1, 2147484 ); - temp = L_max( temp, E_in2 ); // q_ein - IF( temp == 0 ) + q_Sx_Inv = add( Q30, sub( Q31, q_Sx ) ); + + Sx_Inv_fx[0] = ONE_IN_Q31; + move32(); + q_Sx_Inv_0 = Q30; + move16(); + + exp = norm_l( Sx_fx[0] ); + Sx_fx[0] = L_shl( Sx_fx[0], exp ); + + /* Special treatment for den = 0x40000000, Result is known: 2 * ONE_IN_Q31 */ + IF( GT_32( Sx_fx[0], 0x40000000 ) ) { - IF( E_out2 == 0 ) - { /* We can set hard-coded results */ - Ghat_fx[1] = 0; - exp1 = -19; - move16(); - } - ELSE - { - temp = BASOP_Util_Divide3232_Scale_newton( E_out2, 4611686, &exp1 ); // 4611686 = Q62 - exp1 = sub( exp1, sub( q_eout, 62 ) ); - Ghat_fx[1] = Sqrt32( temp, &exp1 ); // Q = 31 - exp1 - } + Sx_Inv_fx[0] = div_w_newton( ONE_IN_Q31, Sx_fx[0] ); } - ELSE + q_Sx_Inv_0 = sub( q_Sx_Inv, exp ); + + Sx_Inv_fx[1] = ONE_IN_Q31; + move32(); + q_Sx_Inv_1 = Q30; + move16(); + + exp = norm_l( Sx_fx[1] ); + Sx_fx[1] = L_shl( Sx_fx[1], exp ); + + /* Special treatment for den = 0x40000000, Result is known: 2 * ONE_IN_Q31 */ + IF( GT_32( Sx_fx[1], 0x40000000 ) ) { - temp = BASOP_Util_Add_Mant32Exp( temp, sub( 31, q_ein ), EPSILON_MANT, EPSILON_EXP, &exp_temp ); + Sx_Inv_fx[1] = div_w_newton( ONE_IN_Q31, Sx_fx[1] ); + } + q_Sx_Inv_1 = sub( q_Sx_Inv, exp ); - temp = BASOP_Util_Divide3232_Scale_newton( E_out2, temp, &exp1 ); - exp1 = sub( exp1, sub( q_eout, sub( 31, exp_temp ) ) ); - Ghat_fx[1] = Sqrt32( temp, &exp1 ); // Q = 31 - exp1 + /* normalization */ + q_Sx_Inv = s_min( q_Sx_Inv_0, q_Sx_Inv_1 ); + Sx_Inv_fx[0] = L_shr( Sx_Inv_fx[0], q_Sx_Inv_0 - q_Sx_Inv ); + Sx_Inv_fx[1] = L_shr( Sx_Inv_fx[1], q_Sx_Inv_1 - q_Sx_Inv ); + + /* This is equivalent to the prototype signal energy normalization in the publication + * Ghat[0] = sqrt(Eout1 / max(Ein1, 0.001f * Ein2)) + * Ghat[1] = sqrt(Eout2 / max(Ein2, 0.001f * Ein1)) */ + denom = L_max( E_in1, Mpy_32_32( E_in2, INV_1000_Q31 ) ); + exp = norm_l( denom ); + denom = L_shl( denom, exp ); + + Ghat_fx[0] = E_out1; + move32(); + + /* Special treatment for denom = 0x40000000, Result is known: 2 * E_out1 */ + IF( GT_32( denom, 0x40000000 ) ) + { + Ghat_fx[0] = div_w_newton( E_out1, denom ); } + exp = add( add( exp, 1 ), sub( q_ein, q_eout ) ); + Ghat_fx[0] = Sqrt32( Ghat_fx[0], &exp ); + q_Ghat0 = sub( Q31, exp ); + + denom = L_max( E_in2, Mpy_32_32( E_in1, INV_1000_Q31 ) ); + exp = norm_l( denom ); + denom = L_shl( denom, exp ); + + Ghat_fx[1] = E_out2; move32(); - q_Ghat = sub( 31, s_max( exp, exp1 ) ); - Word16 q_diff = sub( 31, q_Ghat ); - Ghat_fx[0] = L_shr( Ghat_fx[0], sub( q_diff, exp ) ); // q_Ghat + /* Special treatment for denom = 0x40000000, Result is known: 2 * E_out2 */ + IF( GT_32( denom, 0x40000000 ) ) + { + Ghat_fx[1] = div_w_newton( E_out2, denom ); + } + exp = add( add( exp, 1 ), sub( q_ein, q_eout ) ); + Ghat_fx[1] = Sqrt32( Ghat_fx[1], &exp ); + q_Ghat1 = sub( Q31, exp ); + + /* normalize q_Ghat */ + q_Ghat = s_min( q_Ghat0, q_Ghat1 ); + + Ghat_fx[0] = L_shr( Ghat_fx[0], sub( q_Ghat0, q_Ghat ) ); move32(); - Ghat_fx[1] = L_shr( Ghat_fx[1], sub( q_diff, exp1 ) ); // q_Ghat + Ghat_fx[1] = L_shr( Ghat_fx[1], sub( q_Ghat1, q_Ghat ) ); move32(); - /* Matrix multiplication, A = Ky' * G_hat * Q */ + /* Matrix multiplication, tmp = Ky' * G_hat * Q */ FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ ) { GhatQ_fx[chA][0] = Mpy_32_32( Q_fx[chA][0], Ghat_fx[chA] ); @@ -5041,154 +5266,397 @@ static void formulate2x2MixingMatrixNoCross_fx( } q_GhatQ = sub( add( Q31, q_Ghat ), 31 ); - exp = sub( s_min( L_norm_arr( KyRe_fx[0], i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ) ), L_norm_arr( KyIm_fx[0], i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ) ) ), 1 ); - scale_sig32( KyRe_fx[0], BINAURAL_CHANNELS * BINAURAL_CHANNELS, exp ); - scale_sig32( KyIm_fx[0], BINAURAL_CHANNELS * BINAURAL_CHANNELS, exp ); - q_ky = add( q_ky, exp ); + FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ ) + { + FOR( chB = 0; chB < BINAURAL_CHANNELS; chB++ ) + { + tmpRe_fx[chA][chB] = Madd_32_32( Mpy_32_32( KyRe_fx[0][chA], GhatQ_fx[0][chB] ), KyRe_fx[1][chA], GhatQ_fx[1][chB] ); + tmpIm_fx[chA][chB] = Msub_32_32( Mpy_32_32( L_negate( KyIm_fx[0][chA] ), GhatQ_fx[0][chB] ), KyIm_fx[1][chA], GhatQ_fx[1][chB] ); + move32(); + move32(); + } + } + + q_temp = sub( add( q_ky, q_GhatQ ), 31 ); +#ifdef OPT_2182_MATRIX_SCALE_OPS + matrixScale_fx( tmpRe_fx, tmpIm_fx, &q_temp ); +#endif + + /* A = Ky' * G_hat * Q * Kx (see publication) */ + matrixMul_fx( tmpRe_fx, tmpIm_fx, &q_temp, + Kxre_fx, Kxim_fx, &q_Kx, /* Kxre_fx, Kxim_fx have 1 bit headroom and are normalized, no scaling neccessary */ + Are_fx, Aim_fx, &q_A ); + + /* Find nearest orthonormal matrix P to A = Ky' * G_hat * Q * Kx + For matrix A that is P = A(A'A)^0.5 */ + matrixTransp1Mul_fx( Are_fx, Aim_fx, q_A, Are_fx, Aim_fx, q_A, tmpRe_fx, tmpIm_fx, &q_temp ); + + eig2x2_fx( tmpRe_fx[0][0], tmpRe_fx[1][1], q_temp, tmpRe_fx[1][0], tmpIm_fx[1][0], q_temp, Ure_fx, Uim_fx, &q_U, D_fx, &q_D ); + + div_fx[0] = TEN_THOUSAND_Q17; + move32(); + q_div[0] = Q17; + move16(); + + exp = norm_l( D_fx[0] ); + D_fx_norm = L_shl( D_fx[0], exp ); + exp = add( q_D, exp ); + + /* check if D_fx[0] > 1.0 / (10000.0 * 10000.0) */ + IF( GT_32( D_fx_norm, 0 ) && ( ( LT_32( exp, 57 ) || ( EQ_32( exp, 57 ) && GT_32( D_fx_norm, ONE_DIV_100000000_Q57 ) ) ) ) ) + { + exp = sub( 31, exp ); + div_fx[0] = Isqrt_lc1( D_fx_norm, &exp ); + q_div[0] = sub( Q31, exp ); + } + + div_fx[1] = TEN_THOUSAND_Q17; + move32(); + q_div[1] = Q17; + move16(); + + exp = norm_l( D_fx[1] ); + D_fx_norm = L_shl( D_fx[1], exp ); + exp = add( q_D, exp ); + + /* check if D_fx[1] > 1.0 / (10000.0 * 10000.0) */ + IF( GT_32( D_fx_norm, 0 ) && ( ( LT_32( exp, 57 ) || ( EQ_32( exp, 57 ) && GT_32( D_fx_norm, ONE_DIV_100000000_Q57 ) ) ) ) ) + { + exp = sub( Q31, exp ); + div_fx[1] = Isqrt_lc1( D_fx_norm, &exp ); + q_div[1] = sub( Q31, exp ); + } +#ifdef OPT_2182_MATRIX_SCALE_OPS + matrixScale_fx( Are_fx, Aim_fx, &q_A ); +#endif + matrixMul_fx( Are_fx, Aim_fx, &q_A, + Ure_fx, Uim_fx, &q_U, /* Ure_fx, Uim_fx have 1 bit headroom and are normalized, no scaling neccessary */ + tmpRe_fx, tmpIm_fx, &q_temp ); + + norm_div[0] = Q31; + move16(); + norm_div[1] = Q31; + move16(); + + FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ ) + { + FOR( chB = 0; chB < BINAURAL_CHANNELS; chB++ ) + { + tmpRe_fx[chA][chB] = Mpy_32_32( tmpRe_fx[chA][chB], div_fx[chB] ); + exp = norm_l( tmpRe_fx[chA][chB] ); + if ( tmpRe_fx[chA][chB] ) + norm_div[chB] = s_min( norm_div[chB], exp ); + + tmpIm_fx[chA][chB] = Mpy_32_32( tmpIm_fx[chA][chB], div_fx[chB] ); + exp = norm_l( tmpIm_fx[chA][chB] ); + if ( tmpIm_fx[chA][chB] ) + norm_div[chB] = s_min( norm_div[chB], exp ); + } + } + + /* normalize */ + q_div_min = s_min( add( q_div[0], norm_div[0] ), add( q_div[1], norm_div[1] ) ); + q_div_min = sub( q_div_min, 1 ); /* 1 bit headroom */ + + FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ ) + { + FOR( chB = 0; chB < BINAURAL_CHANNELS; chB++ ) + { + tmpRe_fx[chA][chB] = L_shr( tmpRe_fx[chA][chB], q_div[chB] - q_div_min ); + tmpIm_fx[chA][chB] = L_shr( tmpIm_fx[chA][chB], q_div[chB] - q_div_min ); + } + } + q_temp = sub( add( q_temp, q_div_min ), Q31 ); + +#ifdef OPT_2182_MATRIX_SCALE_OPS + matrixTransp2Mul_fx( + tmpRe_fx, tmpIm_fx, &q_temp, /* tmpRe_fx, tmpIm_fx have 1 bit headroom and are normalized, no scaling neccessary */ + Ure_fx, Uim_fx, &q_U, /* Ure_fx, Uim_fx have 1 bit headroom and are normalized, no scaling neccessary */ + Pre_fx, Pim_fx, &q_P ); /* Nearest orthonormal matrix P to matrix A formulated */ +#else + matrixTransp2Mul_fx( tmpRe_fx, tmpIm_fx, &q_temp, Ure_fx, Uim_fx, &q_U, + 0 /*int Ascale*/, + 0 /*int Bscale*/, + Pre_fx, Pim_fx, &q_P ); /* Nearest orthonormal matrix P to matrix A formulated */ +#endif + + /* These are the final formulas of the JAES publication M = Ky P Kx^(-1) */ + + for ( chA = 0; chA < BINAURAL_CHANNELS; chA++ ) + { + for ( chB = 0; chB < BINAURAL_CHANNELS; chB++ ) + { + Pre_fx[chA][chB] = Mpy_32_32( Pre_fx[chA][chB], Sx_Inv_fx[chB] ); + Pim_fx[chA][chB] = Mpy_32_32( Pim_fx[chA][chB], Sx_Inv_fx[chB] ); + } + } + q_P = sub( q_P, sub( Q31, q_Sx_Inv ) ); + + matrixMul_fx( KyRe_fx, KyIm_fx, &q_ky, /* KyRe_fx, KyIm_fx have 1 bit headroom and are normalized, no scaling neccessary */ + Pre_fx, Pim_fx, &q_P, /* Pre_fx, Pim_fx have 1 bit headroom and are normalized, no scaling neccessary */ + tmpRe_fx, tmpIm_fx, &q_temp ); + + +#ifdef OPT_2182_MATRIX_SCALE_OPS + matrixScale_fx( tmpRe_fx, tmpIm_fx, &q_temp ); + matrixTransp2Mul_fx( + tmpRe_fx, tmpIm_fx, &q_temp, + Uxre_fx, Uxim_fx, &q_Ux, /* Ure_fx, Uim_fx have 1 bit headroom and are normalized, no scaling neccessary */ + Mre_fx, Mim_fx, q_M ); +#else + matrixTransp2Mul_fx( tmpRe_fx, tmpIm_fx, &q_temp, Uxre_fx, Uxim_fx, &q_Ux, + 1 /*int Ascale*/, + 0 /*int Bscale*/, + Mre_fx, Mim_fx, q_M ); +#endif + return; +} + +#else +static void chol2x2_fx( + const Word32 E1, /*q_E*/ + const Word32 E2, /*q_E*/ + Word16 q_E, + const Word32 Cre, /*q_C*/ + const Word32 Cim, /*q_C*/ + Word16 q_C, + Word32 outRe[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_out*/ + Word32 outIm[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_out*/ + Word16 *q_out ) +{ + Word16 chA, chB; + Word32 sqrtVal_fx, temp; + Word16 exp, q_re1, q_re2, q_re3, q_im, q_tmp; + Word32 e1, e2, c_re, c_im; + Word16 q_e, q_c; + + exp = sub( get_min_scalefactor( E1, E2 ), 1 ); + e1 = L_shl( E1, exp ); + e2 = L_shl( E2, exp ); + q_e = add( q_E, exp ); + + exp = sub( get_min_scalefactor( Cre, Cim ), 1 ); + c_re = L_shl( Cre, exp ); + c_im = L_shl( Cim, exp ); + q_c = add( q_C, exp ); + + + FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ ) + { + FOR( chB = 0; chB < BINAURAL_CHANNELS; chB++ ) + { + outRe[chA][chB] = 0; + move32(); + outIm[chA][chB] = 0; + move32(); + } + } + + IF( GT_32( e1, e2 ) ) /* Perform Cholesky decomposition according to louder channel first */ + { + exp = sub( 31, q_e ); + outRe[0][0] = Sqrt32( e1, &exp ); + move32(); + q_re1 = sub( 31, exp ); + + // 4611686 = 1e-12 in Q62 + IF( outRe[0][0] == 0 ) + { + outRe[1][0] = BASOP_Util_Divide3232_Scale_newton( c_re, 4611686, &exp ); + move32(); + q_re2 = add( sub( 31, exp ), sub( q_c, 62 ) ); + + outIm[1][0] = BASOP_Util_Divide3232_Scale_newton( c_im, 4611686, &exp ); + move32(); + q_im = add( sub( 31, exp ), sub( q_c, 62 ) ); + } + ELSE + { + Word32 denom; + Word16 den_exp; + Word32 my_outRe, my_outIm; + + /* Compute denom = 1.0 / outRe[0][0] */ + denom = ISqrt32( outRe[0][0], &exp ); + denom = Mpy_32_32( denom, denom ); + den_exp = shl( exp, 1 ); + + /* Normalise c_re, c_im */ + exp = norm_l( c_re ); + my_outRe = L_shl( c_re, exp ); + q_re2 = add( q_c, exp ); + exp = norm_l( c_im ); + my_outIm = L_shl( c_im, exp ); + q_im = add( q_c, exp ); - FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ ) - { - FOR( chB = 0; chB < BINAURAL_CHANNELS; chB++ ) - { - Are_fx[chA][chB] = Madd_32_32( Mpy_32_32( KyRe_fx[0][chA], GhatQ_fx[0][chB] ), KyRe_fx[1][chA], GhatQ_fx[1][chB] ); - Aim_fx[chA][chB] = Msub_32_32( Mpy_32_32( L_negate( KyIm_fx[0][chA] ), GhatQ_fx[0][chB] ), KyIm_fx[1][chA], GhatQ_fx[1][chB] ); + /* Multiply and store c_re*denom and c_im*denom */ + outRe[1][0] = Mpy_32_32( denom, my_outRe ); move32(); + q_re2 = sub( q_re2, den_exp ); + + outIm[1][0] = Mpy_32_32( denom, my_outIm ); move32(); + q_im = sub( q_im, den_exp ); + } + if ( outRe[1][0] == 0 ) + { + q_re2 = Q31; + move16(); + } + if ( outIm[1][0] == 0 ) + { + q_im = Q31; + move16(); } - } - - q_A = sub( add( q_ky, q_GhatQ ), 31 ); // TODO SMM: Check if this is correct!!! - move16(); - /* Find nearest orthonormal matrix P to A = Ky' * G_hat * Q * Kx - For matrix A that is P = A(A'A)^0.5 */ - matrixTransp1Mul_fx( Are_fx, Aim_fx, q_A, Are_fx, Aim_fx, q_A, tmpRe_fx, tmpIm_fx, &q_temp ); + temp = Madd_32_32( Mpy_32_32( c_re, c_re ), c_im, c_im ); + q_tmp = sub( add( q_c, q_c ), 31 ); - eig2x2_fx( tmpRe_fx[0][0], tmpRe_fx[1][1], q_temp, tmpRe_fx[1][0], tmpIm_fx[1][0], q_temp, Ure_fx, Uim_fx, &q_U, D_fx, &q_D ); + // 4611686 = Q62 + IF( e1 == 0 ) + { + Word16 norm = norm_l( temp ); + temp = L_shl( temp, norm ); + q_tmp = add( q_tmp, norm ); + temp = Mpy_32_32( temp, ONE_DIV_EPSILON_MANT ); + q_tmp = sub( q_tmp, ONE_DIV_EPSILON_EXP ); + } + ELSE + { + temp = BASOP_Util_Divide3232_Scale_newton( temp, e1, &exp ); + q_tmp = add( sub( 31, exp ), sub( q_tmp, q_e ) ); + } + if ( temp == 0 ) + { + q_tmp = Q31; + move16(); + } - IF( D_fx[0] != 0 && D_fx[1] == 0 ) // Due to an eig2x2 error, sometimes D_fx[1] becomes zero, which implies that the input matrix should be singular (i.e., determinant = 0). - { - Word32 det_fx = L_sub_sat( Mult_32_32( tmpRe_fx[0][0], tmpRe_fx[1][1] ), - L_add_sat( Mult_32_32( tmpRe_fx[1][0], tmpRe_fx[1][0] ), - Mult_32_32( tmpIm_fx[1][0], tmpIm_fx[1][0] ) ) ); - if ( det_fx != 0 ) + IF( LT_16( q_e, q_tmp ) ) { - D_fx[1] = SMALL_EIGENVALUE; // Setting D_fx[1] to epsilon has no effect, as the value is too small to affect the output. - move32(); + sqrtVal_fx = L_sub( e2, L_shr( temp, sub( q_tmp, q_e ) ) ); ////q_tmp + q_tmp = q_e; + move16(); + } + ELSE + { + sqrtVal_fx = L_sub( L_shr( e2, sub( q_e, q_tmp ) ), temp ); // q_tmp } - } - IF( D_fx[0] == 0 ) - { - temp = ONE_DIV_EPSILON_MANT; /* Result of 1.0/eps with full precision */ + exp = sub( 31, q_tmp ); + outRe[1][1] = Sqrt32( L_max( 0, sqrtVal_fx ), &exp ); + move32(); + q_re3 = sub( 31, exp ); + + *q_out = s_min( s_min( q_re1, q_re2 ), s_min( q_re3, q_im ) ); + outRe[0][0] = L_shr( outRe[0][0], sub( q_re1, *q_out ) ); + move32(); + outRe[1][0] = L_shr( outRe[1][0], sub( q_re2, *q_out ) ); + move32(); + outIm[1][0] = L_shr( outIm[1][0], sub( q_im, *q_out ) ); + move32(); + outRe[1][1] = L_shr( outRe[1][1], sub( q_re3, *q_out ) ); move32(); - exp = ONE_DIV_EPSILON_EXP; - move16(); } ELSE { - temp = BASOP_Util_Divide3232_Scale_newton( ONE_IN_Q30, D_fx[0], &exp ); - exp = sub( exp, sub( Q30, q_D ) ); - } - div_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp - move32(); - - // Sqrt(1) - div_fx[1] = L_add( 0, 2047986068 ); // Q = 31 - exp1 - exp1 = add( 0, 20 ); - - IF( D_fx[1] != 0 ) // This is the new code: replace div sqrt by isqrt - { - exp1 = sub( 31, q_D ); - div_fx[1] = ISqrt32( D_fx[1], &exp1 ); + exp = sub( 31, q_e ); + outRe[1][1] = Sqrt32( e2, &exp ); move32(); - } - q_div = sub( 31, s_max( exp, exp1 ) ); - - div_fx[0] = L_shr( div_fx[0], sub( sub( 31, exp ), q_div ) ); // q_div - move32(); - div_fx[1] = L_shr( div_fx[1], sub( sub( 31, exp1 ), q_div ) ); // q_div - move32(); - - // 1310720000 = 10,000.0f in Q17 - Word32 thresh = L_shl_sat( 1310720000, sub( q_div, Q17 ) ); // q_div - div_fx[0] = L_min( div_fx[0], thresh ); // q_div - div_fx[1] = L_min( div_fx[1], thresh ); // q_div + q_re1 = sub( 31, exp ); - matrixMul_fx( Are_fx, Aim_fx, &q_A, Ure_fx, Uim_fx, &q_U, tmpRe_fx, tmpIm_fx, &q_temp ); + // 4611686 = Q62 + IF( outRe[1][1] == 0 ) + { + // outRe[0][1] = BASOP_Util_Divide3232_Scale_newton( c_re, 4611686, &exp ); + Word32 tmp1 = 1953125005; /* 1/4611686 Q62 */ + exp = 9; - exp = L_norm_arr( div_fx, BINAURAL_CHANNELS ); - scale_sig32( div_fx, BINAURAL_CHANNELS, exp ); - q_div = add( q_div, exp ); + outRe[0][1] = Mpy_32_32( tmp1, c_re ); + move32(); + q_re2 = add( sub( 31, exp ), sub( q_c, 62 ) ); - FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ ) - { - FOR( chB = 0; chB < BINAURAL_CHANNELS; chB++ ) + // outIm[0][1] = BASOP_Util_Divide3232_Scale_newton( -c_im, 4611686, &exp ); + outIm[0][1] = Mpy_32_32( tmp1, -c_im ); + move32(); + q_im = add( sub( 31, exp ), sub( q_c, 62 ) ); + } + ELSE { - Word64 W_tmp; - - W_tmp = W_mult0_32_32( tmpRe_fx[chA][chB], div_fx[chB] ); - IF( W_tmp != 0 ) - { - Word16 hdrm = sub( W_norm( W_tmp ), 32 ); - tmpRe_fx[chA][chB] = W_shl_sat_l( W_tmp, hdrm ); - move32(); - hdrm_re[chA][chB] = add( add( q_temp, q_div ), hdrm ); - move16(); - } - ELSE { - tmpRe_fx[chA][chB] = 0; + // outRe[0][1] = BASOP_Util_Divide3232_Scale_newton( c_re, outRe[1][1], &exp ); + Word32 tmp1 = BASOP_Util_Divide3232_Scale_newton( 0x7FFFFFFF, outRe[1][1], &exp ); + outRe[0][1] = Mpy_32_32( tmp1, c_re ); move32(); - } + q_re2 = add( sub( 31, exp ), sub( q_c, q_re1 ) ); - W_tmp = W_mult0_32_32( tmpIm_fx[chA][chB], div_fx[chB] ); - IF( W_tmp != 0 ) - { - Word16 hdrm = sub( W_norm( W_tmp ), 32 ); - move16(); - tmpIm_fx[chA][chB] = W_shl_sat_l( W_tmp, hdrm ); - move32(); - hdrm_im[chA][chB] = add( add( q_temp, q_div ), hdrm ); - move16(); - } - ELSE - { - tmpIm_fx[chA][chB] = 0; + // outIm[0][1] = BASOP_Util_Divide3232_Scale_newton( -c_im, outRe[1][1], &exp ); + outIm[0][1] = Mpy_32_32( tmp1, -c_im ); move32(); + q_im = add( sub( 31, exp ), sub( q_c, q_re1 ) ); } } - } + if ( outRe[0][1] == 0 ) + { + q_re2 = Q31; + move16(); + } + if ( outIm[0][1] == 0 ) + { + q_im = Q31; + move16(); + } - minimum_s( hdrm_re[0], i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ), &exp ); - q_temp = exp; - move16(); - minimum_s( hdrm_im[0], BINAURAL_CHANNELS * BINAURAL_CHANNELS, &exp ); - q_temp = s_min( q_temp, exp ); - q_temp = sub( q_temp, 1 ); + temp = Madd_32_32( Mpy_32_32( c_re, c_re ), c_im, c_im ); + q_tmp = sub( add( q_c, q_c ), 31 ); - FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ ) - { - FOR( chB = 0; chB < BINAURAL_CHANNELS; chB++ ) + // 4611686 = 1e-12 in Q62 + IF( e2 == 0 ) { - tmpRe_fx[chA][chB] = L_shr( tmpRe_fx[chA][chB], sub( hdrm_re[chA][chB], q_temp ) ); - tmpIm_fx[chA][chB] = L_shr( tmpIm_fx[chA][chB], sub( hdrm_im[chA][chB], q_temp ) ); - move32(); - move32(); + temp = BASOP_Util_Divide3232_Scale_newton( temp, 4611686, &exp ); + q_tmp = add( sub( 31, exp ), sub( q_tmp, 62 ) ); + } + ELSE + { + temp = BASOP_Util_Divide3232_Scale_newton( temp, e2, &exp ); + q_tmp = add( sub( 31, exp ), sub( q_tmp, q_e ) ); + } + if ( temp == 0 ) + { + q_tmp = Q31; + move16(); } - } - matrixTransp2Mul_fx( tmpRe_fx, tmpIm_fx, &q_temp, Ure_fx, Uim_fx, &q_U, - 0 /*int Ascale*/, - 0 /*int Bscale*/, - Pre_fx, Pim_fx, &q_P ); /* Nearest orthonormal matrix P to matrix A formulated */ + IF( LT_16( q_e, q_tmp ) ) + { + sqrtVal_fx = L_sub( e1, L_shr( temp, sub( q_tmp, q_e ) ) ); ////q_tmp + q_tmp = q_e; + move16(); + } + ELSE + { + sqrtVal_fx = L_sub( L_shr( e1, sub( q_e, q_tmp ) ), temp ); ////q_tmp + } + + exp = sub( 31, q_tmp ); + outRe[0][0] = Sqrt32( L_max( 0, sqrtVal_fx ), &exp ); + move32(); + q_re3 = sub( 31, exp ); - matrixMul_fx( KyRe_fx, KyIm_fx, &q_ky, Pre_fx, Pim_fx, &q_P, Mre_fx, Mim_fx, q_M ); + *q_out = s_min( s_min( q_re1, q_re2 ), s_min( q_re3, q_im ) ); + move16(); + outRe[1][1] = L_shr( outRe[1][1], sub( q_re1, *q_out ) ); + move32(); + outRe[0][1] = L_shr( outRe[0][1], sub( q_re2, *q_out ) ); + move32(); + outIm[0][1] = L_shr( outIm[0][1], sub( q_im, *q_out ) ); + move32(); + outRe[0][0] = L_shr( outRe[0][0], sub( q_re3, *q_out ) ); + move32(); + } return; } -#endif /* FORM2x2MIXMAT_IMPROVEMENT */ - static void formulate2x2MixingMatrix_fx( Word32 Ein1_fx, /*q_Ein*/ @@ -5631,7 +6099,7 @@ static void formulate2x2MixingMatrix_fx( } } #else - /* BINAURAL_CHANNEL == 2 */ + /* BINAURAL_CHANNEL == 2 */ FOR( chB = 0; chB < BINAURAL_CHANNELS; chB++ ) { IF( Sx_fx[chB] == 0 ) @@ -5732,7 +6200,7 @@ static void formulate2x2MixingMatrix_fx( return; } - +#endif static void getDirectPartGains_fx( const Word16 bin, @@ -5998,41 +6466,40 @@ static void hrtfShGetHrtf_fx( *------------------------------------------------------------------------*/ /*! r: Configured reqularization factor value */ - Word16 configure_reqularization_factor_fx( const IVAS_FORMAT ivas_format, /* i : IVAS format */ const Word32 ivas_total_brate /* i : IVAS total bitrate */ ) { Word16 reqularizationFactor; - reqularizationFactor = 16384; /* Default value */ /*Q14*/ + reqularizationFactor = 16384; /* Default value 1.0 Q14 */ move16(); IF( EQ_32( ivas_format, MASA_FORMAT ) ) { IF( GE_32( ivas_total_brate, IVAS_160k ) ) { - reqularizationFactor = 6553; /*Q14*/ + reqularizationFactor = 6553; /* 0.4 Q14 */ move16(); } ELSE IF( EQ_32( ivas_total_brate, IVAS_128k ) ) { - reqularizationFactor = 8192; /*Q14*/ + reqularizationFactor = 8192; /*0.5 Q14 */ move16(); } ELSE IF( EQ_32( ivas_total_brate, IVAS_96k ) ) { - reqularizationFactor = 9830; /*Q14*/ + reqularizationFactor = 9830; /* 0.6 *Q14 */ move16(); } ELSE IF( GE_32( ivas_total_brate, IVAS_64k ) ) { - reqularizationFactor = 13107; /*Q14*/ + reqularizationFactor = 13107; /* 0.8 Q14 */ move16(); } ELSE { - reqularizationFactor = 16384; /*Q14*/ + reqularizationFactor = 16384; /* 1.0 Q14 */ move16(); } } @@ -6041,27 +6508,27 @@ Word16 configure_reqularization_factor_fx( { IF( GE_32( ivas_total_brate, IVAS_96k ) ) { - reqularizationFactor = 6553; /*Q14*/ + reqularizationFactor = 6553; /* 0.4 Q14 */ move16(); } ELSE IF( GE_32( ivas_total_brate, IVAS_80k ) ) { - reqularizationFactor = 8192; /*Q14*/ + reqularizationFactor = 8192; /*0.5 Q14 */ move16(); } ELSE IF( GE_32( ivas_total_brate, IVAS_64k ) ) { - reqularizationFactor = 11468; /*Q14*/ + reqularizationFactor = 11468; /* 0.7 Q14 */ move16(); } ELSE IF( GE_32( ivas_total_brate, IVAS_48k ) ) { - reqularizationFactor = 13107; /*Q14*/ + reqularizationFactor = 13107; /* 0.8 Q14 */ move16(); } ELSE { - reqularizationFactor = 16384; /*Q14*/ + reqularizationFactor = 16384; /* 1.0 Q14 */ move16(); } } -- GitLab From 5254a6d27d13a5ec292f7ac25177bbdcec17b3bc Mon Sep 17 00:00:00 2001 From: mave2802 <59919483+mave2802@users.noreply.github.com> Date: Fri, 19 Dec 2025 21:31:34 +0100 Subject: [PATCH 3/5] removed NONBE_OPT_2193_EIG2X2 option and related code --- .../ivas_dirac_dec_binaural_functions_fx.c | 1144 ++++------------- 1 file changed, 266 insertions(+), 878 deletions(-) diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c index 33eb96e4d..d4e6a6b28 100644 --- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c +++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c @@ -119,17 +119,11 @@ static void ivas_masa_ext_rend_parambin_internal_fx( MASA_EXT_REND_HANDLE hMasaE static void formulate2x2MixingMatrix_fx( Word32 Ein1_fx /*q_Ein*/, Word32 Ein2_fx /*q_Ein*/, Word16 q_Ein, Word32 CinRe_fx /*q_Cin*/, Word32 CinIm_fx /*q_Cin*/, Word16 q_Cin, Word32 Eout1_fx /*q_Eout*/, Word32 Eout2_fx /*q_Eout*/, Word16 q_Eout, Word32 CoutRe_fx /*q_Cout*/, Word32 CoutIm_fx /*q_Cout*/, Word16 q_Cout, Word32 Q_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*Q31*/, Word32 Mre_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_M*/, Word32 Mim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_M*/, Word16 *q_M, const Word16 regularizationFactor_fx /*Q14*/ ); -#ifdef OPT_2182_MATRIX_SCALE_OPS static void matrixScale_fx( Word32 Are_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_A*/, Word32 Aim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_A*/, Word16 *q_A ); -#endif static void matrixMul_fx( Word32 Are[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_A*/, Word32 Aim[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_A*/, Word16 *q_A, Word32 Bre[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_B*/, Word32 Bim[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_B*/, Word16 *q_B, Word32 outRe[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_out*/, Word32 outIm[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_out*/, Word16 *q_out ); -#ifdef OPT_2182_MATRIX_SCALE_OPS static void matrixTransp2Mul_fx( Word32 Are[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_A*/, Word32 Aim[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_A*/, Word16 *q_A, Word32 Bre[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_B*/, Word32 Bim[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_B*/, Word16 *q_B, Word32 outRe[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_out*/, Word32 outIm[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_out*/, Word16 *q_out ); -#else -static void matrixTransp2Mul_fx( Word32 Are[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_A*/, Word32 Aim[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_A*/, Word16 *q_A, Word32 Bre[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_B*/, Word32 Bim[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_B*/, Word16 *q_B, Word32 Ascale, Word32 Bscale, Word32 outRe[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_out*/, Word32 outIm[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_out*/, Word16 *q_out ); -#endif /*------------------------------------------------------------------------- @@ -2237,24 +2231,16 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx( } /* Make matrix multiplication M*Cx*M' to determine resulting covariance matrix of processing input with M */ -#ifdef OPT_2182_MATRIX_SCALE_OPS matrixScale_fx( Mre_fx, Mim_fx, &q_M ); matrixScale_fx( CxRe_fx, CxIm_fx, &q_Cx ); -#endif + matrixMul_fx( Mre_fx, Mim_fx, &q_M, CxRe_fx, CxIm_fx, &q_Cx, tmpMtxRe_fx, tmpMtxIm_fx, &q_tmp ); -#ifdef OPT_2182_MATRIX_SCALE_OPS + matrixScale_fx( tmpMtxRe_fx, tmpMtxIm_fx, &q_tmp ); matrixTransp2Mul_fx( tmpMtxRe_fx, tmpMtxIm_fx, &q_tmp, Mre_fx, Mim_fx, &q_M, resultMtxRe_fx, resultMtxIm_fx, &q_res ); -#else - matrixTransp2Mul_fx( - tmpMtxRe_fx, tmpMtxIm_fx, &q_tmp, Mre_fx, Mim_fx, &q_M, - 1 /*int Ascale*/, - 0 /*int Bscale*/, - resultMtxRe_fx, resultMtxIm_fx, &q_res ); -#endif /* When below the frequency limit where decorrelation is applied, we inject the decorrelated * residual (or missing) signal component. The procedure is active when there are not enough independent @@ -3497,347 +3483,214 @@ static void ivas_dirac_dec_binaural_check_and_switch_transports_headtracked_fx( return; } -#ifndef NONBE_2169_BINAURAL_MIXING_MATRIX_OPT -#ifdef NONBE_OPT_2193_EIG2X2 -static Word32 eig2x2_div_fx( Word32 num, Word32 den ); - -static Word32 eig2x2_div_fx( Word32 num, Word32 den ) +#ifdef NONBE_2169_BINAURAL_MIXING_MATRIX_OPT +static Word32 Isqrt_newton_iter( Word32 val, Word32 isqrt_val, Word16 q_val, Word16 exp ) { - IF( EQ_32( den, 0x40000000 ) ) - { - return num; - } - return div_w_newton( num, den ); -} -#endif + Word32 temp; + + /* newton iteration for y = 1 / sqrt(x) + * y = y * 2 * (0.75 - 0.25 * x * y * y) */ + + temp = Mpy_32_32( isqrt_val, isqrt_val ); + temp = Mpy_32_32( val, temp ); + exp = sub( Q31, sub( q_val, add( exp, exp ) ) ); + + temp = L_sub( 0x60000000, L_shr( temp, sub( 2, exp ) ) ); + temp = Mpy_32_32( isqrt_val, temp ); + if ( val ) + isqrt_val = L_shl( temp, 1 ); + return ( isqrt_val ); +} static void eig2x2_fx( - const Word32 E1_fx, /*q_E*/ - const Word32 E2_fx, /*q_E*/ + Word32 E1_fx, /*q_E*/ + Word32 E2_fx, /*q_E*/ Word16 q_E, - const Word32 Cre_fx, /*q_C*/ - const Word32 Cim_fx, /*q_C*/ + Word32 Cre_fx, /*q_C*/ + Word32 Cim_fx, /*q_C*/ Word16 q_C, - Word32 Ure_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_U*/ - Word32 Uim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_U*/ + Word32 Ure_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_U, 1 bit Headroom */ + Word32 Uim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_U, 1 bit Headroom */ Word16 *q_U, Word32 D_fx[BINAURAL_CHANNELS], /*q_D*/ Word16 *q_D ) { -#ifdef NONBE_OPT_2193_EIG2X2 - Word32 pm_fx, add_fx; - Word32 tmp1, tmp2, e1, e2, c_re, c_im, c0_im, c1_im; - Word32 s0_fx, s1_fx, nval0_fx, nval1_fx; - Word64 crossSquare_fx, tmp3, tmp4; - Word16 q_crossSquare, q_min, q_diff, q_tmp1, q_tmp2, exp, q_e, q_c; - Word16 nval0_q, nval1_q; - Word32 i01, i00, i11, i10; - Word64 eps_fx = ( (Word64) EPSILON_MANT ) << 32; - Word16 eps_q = 63 - EPSILON_EXP; - move32(); - move16(); + Word16 ch; + Word32 _D_fx[BINAURAL_CHANNELS]; + Word32 add_fx, sub_fx, pm_fx, isqrt_fx, s_fx, s1_fx, s2_fx, normVal_fx, normVal_isqrt_fx; + Word64 crossSquare_fx, a_fx; + Word16 q_E2, q_C2, q_a, q_min, exp, q_pm, q_s, q_s2, q_norm; + Word32 i0, i1; set32_fx( (Word32 *) Ure_fx, 0, BINAURAL_CHANNELS * BINAURAL_CHANNELS ); set32_fx( (Word32 *) Uim_fx, 0, BINAURAL_CHANNELS * BINAURAL_CHANNELS ); - exp = sub( get_min_scalefactor( Cre_fx, Cim_fx ), 2 ); - c_re = L_shl( Cre_fx, exp ); - c_im = L_shl( Cim_fx, exp ); - q_c = add( q_C, exp ); + exp = get_min_scalefactor( E1_fx, E2_fx ); + E1_fx = L_shl( E1_fx, exp ); + E2_fx = L_shl( E2_fx, exp ); + q_E = add( q_E, exp ); + q_E2 = add( q_E, q_E ); - exp = sub( get_min_scalefactor( E1_fx, E2_fx ), 2 ); - e1 = L_shl( E1_fx, exp ); - e2 = L_shl( E2_fx, exp ); - q_e = add( q_E, exp ); + exp = get_min_scalefactor( Cre_fx, Cim_fx ); + Cre_fx = L_shl( Cre_fx, exp ); + Cim_fx = L_shl( Cim_fx, exp ); + q_C = add( q_C, exp ); + q_C2 = add( q_C, q_C ); - // crossSquare_fx = (c_re * c_re) + (c_im * c_im) - // a_fx = (e1 + e2) * (e1 + e2) - 4.0f * ((e1 * e2) - crossSquare_fx) = (e1 - e2)^2 + 4 * crossSquare_fx - // pm_fx = 0.5f * sqrtf(max(0.0f, a_fx)) - // add_fx = 0.5f * (e1 + e2) + add_fx = W_extract_l( W_shr( W_add( W_deposit32_l( E1_fx ), W_deposit32_l( E2_fx ) ), 1 ) ); /* 0.5 * ( E1 + E2 ) */ + sub_fx = L_sub( E1_fx, E2_fx ); - tmp1 = L_sub( e1, e2 ); - tmp3 = W_mult_32_32( tmp1, tmp1 ); - q_tmp1 = add( add( q_e, q_e ), 1 ); - if ( !tmp3 ) - { - q_tmp1 = 63; - move16(); - } + /* + * crossSquare = (Cre * Cre) + (Cim * Cim) + */ + crossSquare_fx = W_add( W_mult0_32_32( Cre_fx, Cre_fx ), W_mult0_32_32( Cim_fx, Cim_fx ) ); /* 2 * q_C */ - crossSquare_fx = W_mac_32_32( W_mult_32_32( c_re, c_re ), c_im, c_im ); - q_crossSquare = add( add( q_c, q_c ), 1 ); - if ( !crossSquare_fx ) - { - q_crossSquare = 63; - move16(); - } + /* + * a = (E1 + E2) * (E1 + E2) - 4.0f * ((E1 * E2) - crossSquare) + * a = (E1 - E2) ^ 2 + 4 * crossSquare + */ - tmp4 = crossSquare_fx; - move64(); - q_tmp2 = sub( q_crossSquare, 2 ); - if ( !tmp4 ) - { - q_tmp2 = 63; - move16(); - } + a_fx = W_mult0_32_32( sub_fx, sub_fx ); /* 2 * q_E */ - q_diff = sub( q_tmp1, q_tmp2 ); - q_tmp1 = s_min( q_tmp1, q_tmp2 ); - if ( q_diff > 0 ) - { - tmp3 = W_shr( tmp3, q_diff ); - } - if ( q_diff < 0 ) + q_C2 = add( q_C, q_C ); + q_a = sub( s_min( q_E2, q_C2 ), 2 ); + a_fx = W_add( W_shr( a_fx, sub( q_E2, q_a ) ), W_shr( crossSquare_fx, sub( q_C2, add( q_a, 2 ) ) ) ); + + exp = W_norm( a_fx ); + a_fx = W_shl( a_fx, exp ); + q_a = add( q_a, exp ); + pm_fx = W_extract_h( a_fx ); + pm_fx = L_max( 0, pm_fx ); + q_pm = sub( q_a, Q32 ); + pm_fx = L_max( pm_fx, 0 ); + if ( !pm_fx ) { - tmp4 = W_shl( tmp4, q_diff ); + q_pm = Q31; + move16(); } - tmp3 = W_add( tmp3, tmp4 ); - q_diff = W_norm( tmp3 ); - tmp3 = W_shl( tmp3, q_diff ); - q_tmp1 = add( q_tmp1, q_diff ); - - // pm_fx = 0.5f * sqrtf(max(0.0f, a_fx)) - exp = sub( 63, q_tmp1 ); - pm_fx = Sqrt32( L_max( 0, W_extract_h( tmp3 ) ), &exp ); - pm_fx = L_shr( pm_fx, 1 ); - q_tmp2 = sub( 31, exp ); - - // add_fx = 0.5 * (e1 + e2) - add_fx = L_shr( L_add( e1, e2 ), 1 ); - q_tmp1 = q_e; - move16(); - // D[0] = add + pm; - // D[1] = max( 0.0f, add - pm ); + /* + * add = 0.5f * ( E1 + E2 ) + * pm = 0.5f * sqrtf( max( 0.0f, a ) ) + * add = 0.5f * ( E1 + E2 ) + * D[0] = add + pm + * D[1] = max( 0.0f, add - pm ) + */ - q_diff = sub( q_tmp1, q_tmp2 ); + /* pm_fx is normalized */ + exp = sub( Q31, q_pm ); + isqrt_fx = Isqrt_lc1( pm_fx, &exp ); + isqrt_fx = Isqrt_newton_iter( pm_fx, isqrt_fx, q_pm, exp ); - tmp1 = add_fx; - move32(); - if ( q_diff > 0 ) - { - tmp1 = L_shr( tmp1, q_diff ); - } + pm_fx = Mpy_32_32( pm_fx, isqrt_fx ); + q_pm = sub( q_pm, exp ); + exp = norm_l( pm_fx ); + pm_fx = L_shl( pm_fx, exp ); + q_pm = add( q_pm, exp ); + q_pm = add( q_pm, 1 ); /* 0.5 * sqrt( pm ) */ - tmp2 = pm_fx; - move32(); - if ( q_diff < 0 ) - { - tmp2 = L_shl( tmp2, q_diff ); - } + /* normalize */ + q_min = s_min( q_pm, q_E ); + q_min = sub( q_min, 1 ); + add_fx = L_shr( add_fx, sub( q_E, q_min ) ); + pm_fx = L_shr( pm_fx, sub( q_pm, q_min ) ); - D_fx[0] = L_add( tmp1, tmp2 ); - move32(); - D_fx[1] = L_max( L_sub( tmp1, tmp2 ), 0 ); - move32(); - *q_D = s_min( q_tmp1, q_tmp2 ); - move32(); + D_fx[0] = L_add_sat( add_fx, pm_fx ); + D_fx[1] = L_max( 0, L_sub( add_fx, pm_fx ) ); - // Numeric case, when input is practically zeros - // if ( D_fx[0] < EPSILON_FX ) + *q_D = q_min; + move16(); - IF( LT_32( L_shl_sat( D_fx[0], sub( 31 - EPSILON_EXP, *q_D ) ), EPSILON_MANT ) ) + *q_U = Q30; + move16(); + /* Numeric case, when input is practically zeros */ + IF( LT_32( L_shl_sat( D_fx[0], sub( sub( 31, *q_D ), EPSILON_EXP ) ), EPSILON_MANT ) ) { Ure_fx[0][0] = ONE_IN_Q30; - move32(); Ure_fx[1][1] = ONE_IN_Q30; - move32(); - *q_U = Q30; - move16(); return; } - // Numeric case, when input is near an identity matrix with a gain - tmp1 = Mpy_32_32( INV_1000_Q31, add_fx ); - if ( q_diff > 0 ) - { - tmp1 = L_shr( tmp1, q_diff ); - } - - IF( LT_32( tmp2, tmp1 ) ) + /* Numeric case, when input is near an identity matrix with a gain */ + IF( LT_32( pm_fx, Mpy_32_32( INV_1000_Q31, add_fx ) ) ) { Ure_fx[0][0] = ONE_IN_Q30; - move32(); Ure_fx[1][1] = ONE_IN_Q30; - move32(); - *q_U = Q30; - move16(); return; } - // Eigenvectors - - q_diff = sub( q_e, *q_D ); - q_tmp1 = s_min( q_e, *q_D ); - - tmp1 = D_fx[0]; - move32(); - if ( q_diff > 0 ) - { - tmp1 = L_shr( tmp1, q_diff ); - } - - tmp2 = D_fx[1]; - move32(); - if ( q_diff > 0 ) - { - tmp2 = L_shr( tmp2, q_diff ); - } + /* Eigenvectors */ + q_s = s_min( *q_D, q_E ); + q_s2 = add( q_s, q_s ); - if ( q_diff < 0 ) - { - e1 = L_shl( e1, q_diff ); - } + _D_fx[0] = L_shr( D_fx[0], sub( *q_D, q_s ) ); + _D_fx[1] = L_shr( D_fx[1], sub( *q_D, q_s ) ); + E1_fx = L_shr( E1_fx, sub( q_E, q_s ) ); + E2_fx = L_shr( E2_fx, sub( q_E, q_s ) ); - if ( q_diff < 0 ) + FOR( ch = 0; ch < BINAURAL_CHANNELS; ch++ ) { - e2 = L_shl( e2, q_diff ); - } - - s0_fx = L_sub( tmp1, e1 ); // D_fx[0] - e1 - tmp1 = L_sub( tmp1, e2 ); // D_fx[0] - e2 - s1_fx = L_sub( tmp2, e1 ); // D_fx[1] - e1 - tmp2 = L_sub( tmp2, e2 ); // D_fx[1] - e2 + s1_fx = L_sub( _D_fx[ch], E1_fx ); + s2_fx = L_sub( _D_fx[ch], E2_fx ); - i01 = GT_32( L_abs( tmp1 ), L_abs( s0_fx ) ); // fabsf( D_fx[0] - e2 ) > fabsf( D_fx[0] - e1 ) - i11 = GT_32( L_abs( tmp2 ), L_abs( s1_fx ) ); // fabsf( D_fx[1] - e2 ) > fabsf( D_fx[1] - e1 ) - - if ( i01 ) - { - s0_fx = tmp1; - move32(); - } + i0 = LE_32( L_abs( s2_fx ), L_abs( s1_fx ) ); + i1 = L_xor( i0, 1 ); - if ( i11 ) - { - s1_fx = tmp2; + s_fx = s1_fx; move32(); - } - - // normVal = sqrtf( 1.0f / ( 1e-12f + crossSquare + s * s ) ); - - q_tmp2 = shl( q_tmp1, 1 ); - q_min = s_min( q_tmp2, q_crossSquare ); - q_min = s_min( q_min, eps_q ); - - q_diff = sub( q_tmp2, q_min ); - tmp3 = W_shr( W_mult0_32_32( s0_fx, s0_fx ), q_diff ); - tmp4 = W_shr( W_mult0_32_32( s1_fx, s1_fx ), q_diff ); - - q_diff = sub( q_crossSquare, q_min ); - crossSquare_fx = W_shr( crossSquare_fx, q_diff ); - tmp3 = W_add( tmp3, crossSquare_fx ); - tmp4 = W_add( tmp4, crossSquare_fx ); - - q_diff = sub( eps_q, q_min ); - eps_fx = W_shr( eps_fx, q_diff ); - tmp3 = W_add( tmp3, eps_fx ); - tmp4 = W_add( tmp4, eps_fx ); - - q_diff = W_norm( tmp3 ); - tmp3 = W_shl( tmp3, q_diff ); - nval0_q = add( q_min, q_diff ); - - q_diff = W_norm( tmp4 ); - tmp4 = W_shl( tmp4, q_diff ); - nval1_q = add( q_min, q_diff ); - - // nval0_fx = BASOP_Util_Divide3232_Scale_newton( ONE_IN_Q30, W_extract_h( tmp3 ), &exp ); - // exp = sub( exp, sub( 62, nval0_q ) ); - // - // is equivalent to: - // - // nval0_fx = div_w_newton( ONE_IN_Q30, W_extract_h( tmp3 ) ); - // exp = sub( nval0_q, 61 ); - - nval0_fx = eig2x2_div_fx( ONE_IN_Q30, W_extract_h( tmp3 ) ); - exp = sub( nval0_q, 61 ); - nval0_fx = Sqrt32( nval0_fx, &exp ); - nval0_q = sub( 31, exp ); - - // nval1_fx = BASOP_Util_Divide3232_Scale_newton( ONE_IN_Q30, W_extract_h( tmp4 ), &exp ); - // exp = sub( exp, sub( 62, nval1_q ) ); - // - // is equivalent to: - // - // nval1_fx = div_w_newton( ONE_IN_Q30, W_extract_h( tmp4 ) ); - // exp = sub( nval1_q, 61 ); - - nval1_fx = eig2x2_div_fx( ONE_IN_Q30, W_extract_h( tmp4 ) ); - exp = sub( nval1_q, 61 ); - nval1_fx = Sqrt32( nval1_fx, &exp ); - nval1_q = sub( 31, exp ); - - q_diff = sub( q_c, q_tmp1 ); - q_tmp1 = s_min( q_tmp1, q_c ); - - if ( q_diff > 0 ) - { - c_re = L_shr( c_re, q_diff ); - } - - if ( q_diff > 0 ) - { - c_im = L_shr( c_im, q_diff ); - } - - if ( q_diff < 0 ) - { - s0_fx = L_shl( s0_fx, q_diff ); - } - - if ( q_diff < 0 ) - { - s1_fx = L_shl( s1_fx, q_diff ); - } + if ( !i0 ) + { + s_fx = s2_fx; + move32(); + } - q_diff = sub( nval0_q, nval1_q ); - q_tmp2 = s_min( nval0_q, nval1_q ); + a_fx = W_mult0_32_32( s_fx, s_fx ); /* 2*q_s */ + q_min = s_min( q_s2, q_C2 ); + a_fx = W_shr( a_fx, sub( q_s2, q_min ) ); + a_fx = W_add( a_fx, W_shr( crossSquare_fx, sub( q_C2, q_min ) ) ); /* add 1e-12 ommitted */ - if ( q_diff > 0 ) - { - nval0_fx = L_shr( nval0_fx, q_diff ); - } + exp = W_norm( a_fx ); + a_fx = W_shl( a_fx, exp ); + normVal_fx = W_extract_h( a_fx ); + q_norm = sub( add( q_min, exp ), Q31 + 1 ); - if ( q_diff < 0 ) - { - nval1_fx = L_shl( nval1_fx, q_diff ); - } + /* normVal_fx is normalized */ + exp = sub( Q31, q_norm ); + normVal_isqrt_fx = Isqrt_lc1( normVal_fx, &exp ); + normVal_isqrt_fx = Isqrt_newton_iter( normVal_fx, normVal_isqrt_fx, q_norm, exp ); - *q_U = sub( add( q_tmp1, q_tmp2 ), 31 ); + q_norm = add( Q31, exp ); - i00 = L_sub( 1, i01 ); - i10 = L_sub( 1, i11 ); + a_fx = W_mult0_32_32( s_fx, normVal_isqrt_fx ); /* q_s + q_norm */ + a_fx = W_shl( a_fx, sub( q_norm, q_s ) ); /* Q62 */ + Ure_fx[i0][ch] = W_extract_h( a_fx ); /* Q30 */ - c0_im = c_im; - move32(); - if ( i00 > 0 ) - { - c0_im = L_negate( c0_im ); - } + a_fx = W_mult0_32_32( Cre_fx, normVal_isqrt_fx ); /* q_C + q_norm */ + a_fx = W_shl( a_fx, sub( q_norm, q_C ) ); /* Q62 */ + Ure_fx[i1][ch] = W_extract_h( a_fx ); /* Q30 */ - c1_im = c_im; - move32(); - if ( i10 > 0 ) - { - c1_im = L_negate( c1_im ); + a_fx = W_mult0_32_32( Cim_fx, normVal_isqrt_fx ); /* q_C + q_norm */ + a_fx = W_shl( a_fx, sub( q_norm, q_C ) ); /* Q62 */ + if ( i0 ) + a_fx = W_neg( a_fx ); + Uim_fx[i1][ch] = W_extract_h( a_fx ); /* Q30 */ } - - Ure_fx[i00][0] = Mpy_32_32( s0_fx, nval0_fx ); - move32(); - Ure_fx[i01][0] = Mpy_32_32( c_re, nval0_fx ); - move32(); - Uim_fx[i01][0] = Mpy_32_32( c0_im, nval0_fx ); - move32(); - - Ure_fx[i10][1] = Mpy_32_32( s1_fx, nval1_fx ); - move32(); - Ure_fx[i11][1] = Mpy_32_32( c_re, nval1_fx ); - move32(); - Uim_fx[i11][1] = Mpy_32_32( c1_im, nval1_fx ); - move32(); +} #else +static void eig2x2_fx( + const Word32 E1_fx, /*q_E*/ + const Word32 E2_fx, /*q_E*/ + Word16 q_E, + const Word32 Cre_fx, /*q_C*/ + const Word32 Cim_fx, /*q_C*/ + Word16 q_C, + Word32 Ure_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_U*/ + Word32 Uim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_U*/ + Word16 *q_U, + Word32 D_fx[BINAURAL_CHANNELS], /*q_D*/ + Word16 *q_D ) +{ Word16 chA, chB, ch; Word32 s_fx, normVal_fx, crossSquare_fx, a_fx, pm_fx, add_fx; Word32 tmp1, tmp2, tmp3, e1, e2, c_re, c_im; @@ -4204,7 +4057,6 @@ static void eig2x2_fx( *q_U = q_U_2; move16(); } -#endif return; } #endif @@ -4221,9 +4073,7 @@ static void matrixDiagMul_fx( { Word16 chA, chB; -#ifdef OPT_2185_MATRIX_OUT_SCALING Word32 not_zero = 0; -#endif FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ ) { @@ -4233,35 +4083,22 @@ static void matrixDiagMul_fx( imOut_fx[chA][chB] = Mpy_32_32( imIn_fx[chA][chB], D_fx[chB] ); move32(); move32(); -#ifdef OPT_2185_MATRIX_OUT_SCALING not_zero = L_or( not_zero, reOut_fx[chA][chB] ); not_zero = L_or( not_zero, imOut_fx[chA][chB] ); -#endif } } *q_Out = sub( add( q_In, q_D ), 31 ); move16(); -#ifdef OPT_2185_MATRIX_OUT_SCALING if ( !not_zero ) { *q_Out = Q31; move16(); } -#else - Word16 size = i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ); - if ( L_and( is_zero_arr( reOut_fx[0], size ), is_zero_arr( imOut_fx[0], size ) ) ) - { - *q_Out = Q31; - move16(); - } -#endif - return; } -#ifdef OPT_2182_MATRIX_SCALE_OPS static void matrixScale_fx( Word32 Are_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_A*/ Word32 Aim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_A*/ @@ -4275,7 +4112,6 @@ static void matrixScale_fx( *q_A = add( *q_A, shift ); move16(); } -#endif static void matrixMul_fx( Word32 Are_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_A*/ @@ -4288,29 +4124,8 @@ static void matrixMul_fx( Word32 outIm_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_out*/ Word16 *q_out ) { -#ifdef OPT_2182_MATRIX_SCALE_OPS - Word16 chA, chB; -#else Word16 chA, chB; - Word16 min_q_shift1, min_q_shift2; - Word16 size = i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ); - - min_q_shift1 = sub( s_min( L_norm_arr( Are_fx[0], size ), L_norm_arr( Aim_fx[0], size ) ), 1 ); - min_q_shift2 = sub( s_min( L_norm_arr( Bre_fx[0], size ), L_norm_arr( Bim_fx[0], size ) ), 1 ); - - scale_sig32( Are_fx[0], size, min_q_shift1 ); - scale_sig32( Aim_fx[0], size, min_q_shift1 ); - scale_sig32( Bre_fx[0], size, min_q_shift2 ); - scale_sig32( Bim_fx[0], size, min_q_shift2 ); - - *q_A = add( *q_A, min_q_shift1 ); - *q_B = add( *q_B, min_q_shift2 ); - move16(); - move16(); -#endif -#ifdef OPT_2185_MATRIX_OUT_SCALING Word32 not_zero = 0; -#endif FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ ) { @@ -4337,32 +4152,18 @@ static void matrixMul_fx( Are_fx[chA][1], Bim_fx[1][chB] ); move32(); #endif /* #ifdef IVAS_ENH64_CADENCE_CHANGES */ -#ifdef OPT_2185_MATRIX_OUT_SCALING not_zero = L_or( not_zero, outRe_fx[chA][chB] ); not_zero = L_or( not_zero, outIm_fx[chA][chB] ); -#endif } } *q_out = sub( add( *q_A, *q_B ), 31 ); move16(); -#ifdef OPT_2185_MATRIX_OUT_SCALING if ( !not_zero ) { *q_out = Q31; move16(); } -#else -#ifdef OPT_2182_MATRIX_SCALE_OPS - Word16 size = BINAURAL_CHANNELS * BINAURAL_CHANNELS; -#endif - if ( L_and( is_zero_arr( outRe_fx[0], size ), is_zero_arr( outIm_fx[0], size ) ) ) - { - *q_out = Q31; - move16(); - } -#endif - return; } @@ -4381,25 +4182,16 @@ static void matrixTransp1Mul_fx( Word64 tmp_outRe_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS]; Word64 tmp_outIm_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS]; -#ifdef OPT_2181_MATRIX_TRANSP_1_MUL Word64 tmp64; Word16 common_lsh, q; q = add( add( q_A, q_B ), 1 ); common_lsh = sub( 63, q ); move16(); -#else - Word16 q_tmp_outRe_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS]; - Word16 q_tmp_outIm_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS]; - Word64 tmp64_1, tmp64_2; - Word16 tmp16, q_common = 63; - move16(); -#endif FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ ) { FOR( chB = 0; chB < BINAURAL_CHANNELS; chB++ ) { -#ifdef OPT_2181_MATRIX_TRANSP_1_MUL tmp64 = W_mult_32_32( Are_fx[0][chA], Bre_fx[0][chB] ); tmp64 = W_mac_32_32( tmp64, Are_fx[1][chA], Bre_fx[1][chB] ); tmp64 = W_mac_32_32( tmp64, Aim_fx[0][chA], Bim_fx[0][chB] ); @@ -4413,82 +4205,32 @@ static void matrixTransp1Mul_fx( tmp_outIm_fx[chA][chB] = W_mac_32_32( tmp64, Are_fx[1][chA], Bim_fx[1][chB] ); move64(); common_lsh = s_min( common_lsh, W_norm( tmp_outIm_fx[chA][chB] ) ); -#else - tmp64_1 = W_mac_32_32( W_mult_32_32( Are_fx[0][chA], Bre_fx[0][chB] ), Are_fx[1][chA], Bre_fx[1][chB] ); // Q: add( add( q_A, q_B ), 1 ) - tmp64_2 = W_mac_32_32( W_mult_32_32( Aim_fx[0][chA], Bim_fx[0][chB] ), Aim_fx[1][chA], Bim_fx[1][chB] ); // Q: add( add( q_A, q_B ), 1 ) - tmp_outRe_fx[chA][chB] = W_add( tmp64_1, tmp64_2 ); // Q: add( add( q_A, q_B ), 1 ) - move64(); - tmp16 = W_norm( tmp_outRe_fx[chA][chB] ); - tmp_outRe_fx[chA][chB] = W_shl( tmp_outRe_fx[chA][chB], tmp16 ); // Q:add( tmp16, add( add( q_A, q_B ), 1 ) ) - move64(); - q_tmp_outRe_fx[chA][chB] = add( tmp16, add( add( q_A, q_B ), 1 ) ); - move16(); - q_common = s_min( q_tmp_outRe_fx[chA][chB], q_common ); - - - tmp64_1 = W_mac_32_32( W_mult_32_32( Are_fx[0][chA], Bim_fx[0][chB] ), Are_fx[1][chA], Bim_fx[1][chB] ); // Q: add( add( q_A, q_B ), 1 ) - tmp64_2 = W_mac_32_32( W_mult_32_32( Aim_fx[0][chA], Bre_fx[0][chB] ), Aim_fx[1][chA], Bre_fx[1][chB] ); // Q: add( add( q_A, q_B ), 1 ) - tmp_outIm_fx[chA][chB] = W_sub( tmp64_1, tmp64_2 ); // Q: add( add( q_A, q_B ), 1 ) - move64(); - tmp16 = W_norm( tmp_outIm_fx[chA][chB] ); - tmp_outIm_fx[chA][chB] = W_shl( tmp_outIm_fx[chA][chB], tmp16 ); // Q:add( tmp16, add( add( q_A, q_B ), 1 ) ) - move64(); - q_tmp_outIm_fx[chA][chB] = add( tmp16, add( add( q_A, q_B ), 1 ) ); - move16(); - q_common = s_min( q_tmp_outIm_fx[chA][chB], q_common ); -#endif } } -#ifdef OPT_2185_MATRIX_OUT_SCALING Word32 not_zero = 0; -#endif FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ ) { FOR( chB = 0; chB < BINAURAL_CHANNELS; chB++ ) { -#ifdef OPT_2181_MATRIX_TRANSP_1_MUL outRe_fx[chA][chB] = W_extract_h( W_shl( tmp_outRe_fx[chA][chB], common_lsh ) ); move32(); outIm_fx[chA][chB] = W_extract_h( W_shl( tmp_outIm_fx[chA][chB], common_lsh ) ); move32(); -#else - outRe_fx[chA][chB] = W_extract_h( W_shl( tmp_outRe_fx[chA][chB], s_max( -63, sub( q_common, q_tmp_outRe_fx[chA][chB] ) ) ) ); - move32(); - outIm_fx[chA][chB] = W_extract_h( W_shl( tmp_outIm_fx[chA][chB], s_max( -63, sub( q_common, q_tmp_outIm_fx[chA][chB] ) ) ) ); - move32(); -#endif -#ifdef OPT_2185_MATRIX_OUT_SCALING not_zero = L_or( not_zero, outRe_fx[chA][chB] ); not_zero = L_or( not_zero, outIm_fx[chA][chB] ); -#endif } } -#ifdef OPT_2181_MATRIX_TRANSP_1_MUL *q_out = sub( add( q, common_lsh ), 32 ); move16(); -#else - *q_out = sub( q_common, 32 ); - move16(); -#endif -#ifdef OPT_2185_MATRIX_OUT_SCALING if ( !not_zero ) { *q_out = Q31; move16(); } -#else - Word16 size = i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ); - if ( L_and( is_zero_arr( outRe_fx[0], size ), is_zero_arr( outIm_fx[0], size ) ) ) - { - *q_out = Q31; - move16(); - } -#endif return; } -#ifdef OPT_2182_MATRIX_SCALE_OPS static void matrixTransp2Mul_fx( Word32 Are_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_A*/ Word32 Aim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_A*/ @@ -4499,50 +4241,10 @@ static void matrixTransp2Mul_fx( Word32 outRe_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_out*/ Word32 outIm_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_out*/ Word16 *q_out ) -#else -static void matrixTransp2Mul_fx( - Word32 Are_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_A*/ - Word32 Aim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_A*/ - Word16 *q_A, - Word32 Bre_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_B*/ - Word32 Bim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_B*/ - Word16 *q_B, - Word32 Ascale, - Word32 Bscale, - Word32 outRe_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_out*/ - Word32 outIm_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_out*/ - Word16 *q_out ) -#endif { -#ifdef OPT_2182_MATRIX_SCALE_OPS Word16 chA, chB; // Word16 size = BINAURAL_CHANNELS * BINAURAL_CHANNELS; -#else - Word16 chA, chB; - Word16 min_q_shift; - Word16 size = BINAURAL_CHANNELS * BINAURAL_CHANNELS; - - IF( Ascale == 1 ) - { - min_q_shift = sub( s_min( L_norm_arr( Are_fx[0], size ), L_norm_arr( Aim_fx[0], size ) ), 1 ); - scale_sig32( Are_fx[0], size, min_q_shift ); - scale_sig32( Aim_fx[0], size, min_q_shift ); - *q_A = add( *q_A, min_q_shift ); - move16(); - } - - IF( Bscale == 1 ) - { - min_q_shift = sub( s_min( L_norm_arr( Bre_fx[0], size ), L_norm_arr( Bim_fx[0], size ) ), 1 ); - scale_sig32( Bre_fx[0], size, min_q_shift ); - scale_sig32( Bim_fx[0], size, min_q_shift ); - *q_B = add( *q_B, min_q_shift ); - move16(); - } -#endif -#ifdef OPT_2185_MATRIX_OUT_SCALING Word32 not_zero = 0; -#endif FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ ) { FOR( chB = 0; chB < BINAURAL_CHANNELS; chB++ ) @@ -4568,55 +4270,23 @@ static void matrixTransp2Mul_fx( Are_fx[chA][1], Bim_fx[chB][1] ); move32(); #endif /* #ifdef IVAS_ENH64_CADENCE_CHANGES */ -#ifdef OPT_2185_MATRIX_OUT_SCALING not_zero = L_or( not_zero, outRe_fx[chA][chB] ); not_zero = L_or( not_zero, outIm_fx[chA][chB] ); -#endif } } *q_out = sub( add( *q_A, *q_B ), 31 ); move16(); -#ifdef OPT_2185_MATRIX_OUT_SCALING if ( !not_zero ) { *q_out = Q31; move16(); } -#else -#ifdef OPT_2182_MATRIX_SCALE_OPS - Word16 size = BINAURAL_CHANNELS * BINAURAL_CHANNELS; -#endif - if ( L_and( is_zero_arr( outRe_fx[0], size ), is_zero_arr( outIm_fx[0], size ) ) ) - { - *q_out = Q31; - move16(); - } -#endif return; } #ifdef NONBE_2169_BINAURAL_MIXING_MATRIX_OPT -static Word32 Isqrt_newton_iter( Word32 val, Word32 isqrt_val, Word16 q_val, Word16 exp ) -{ - Word32 temp; - - /* newton iteration for y = 1 / sqrt(x) - * y = y * 2 * (0.75 - 0.25 * x * y * y) */ - - temp = Mpy_32_32( isqrt_val, isqrt_val ); - temp = Mpy_32_32( val, temp ); - exp = sub( Q31, sub( q_val, add( exp, exp ) ) ); - - temp = L_sub( 0x60000000, L_shr( temp, sub( 2, exp ) ) ); - temp = Mpy_32_32( isqrt_val, temp ); - if ( val ) - isqrt_val = L_shl( temp, 1 ); - - return ( isqrt_val ); -} - static void chol2x2_fx( Word32 E1, /*q_E*/ Word32 E2, /*q_E*/ @@ -4628,22 +4298,15 @@ static void chol2x2_fx( Word32 outIm[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /* q_out, 1 bit Headroom */ Word16 *q_out ) { - Word16 chA, chB; - Word32 sqrtVal, iSqrtVal; + Word32 E_tmp, sqrtVal, iSqrtVal; Word16 q_sqrtVal, q_min; Word16 exp = 0; - Word16 q_out_0_0, q_out_0_1, q_out_1_0, q_out_1_1; + Word16 q_out_0_0, q_out_1_0, q_out_1_1; + Word32 i0, i1; + + set32_fx( (Word32 *) outRe, 0, BINAURAL_CHANNELS * BINAURAL_CHANNELS ); + set32_fx( (Word32 *) outIm, 0, BINAURAL_CHANNELS * BINAURAL_CHANNELS ); - FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ ) - { - FOR( chB = 0; chB < BINAURAL_CHANNELS; chB++ ) - { - outRe[chA][chB] = 0; - move32(); - outIm[chA][chB] = 0; - move32(); - } - } exp = get_min_scalefactor( E1, E2 ); E1 = L_shl( E1, exp ); E2 = L_shl( E2, exp ); @@ -4654,340 +4317,91 @@ static void chol2x2_fx( Cim = L_shl( Cim, exp ); q_C = add( q_C, exp ); - IF( GT_32( E1, E2 ) ) /* Perform Cholesky decomposition according to louder channel first */ - { - /* outRe[0][0] = sqrt(E1) */ - exp = sub( Q31, q_E ); - iSqrtVal = Isqrt_lc1( E1, &exp ); - iSqrtVal = Isqrt_newton_iter( E1, iSqrtVal, q_E, exp ); - outRe[0][0] = Mpy_32_32( E1, iSqrtVal ); - q_out_0_0 = sub( q_E, exp ); - - /* outRe[1][0] = Cre / (outRe[0][0]);*/ - q_out_1_0 = Q31; - move16(); - outRe[1][0] = Mpy_32_32( Cre, iSqrtVal ); - if ( Cre != 0 ) - q_out_1_0 = sub( q_C, exp ); - - /* outIm[1][0] = Cim / (outRe[0][0]); */ - move16(); - outIm[1][0] = Mpy_32_32( Cim, iSqrtVal ); - if ( Cim != 0 ) - q_out_1_0 = sub( q_C, exp ); - - /* outRe[1][1] = sqrt(max(0.0f, E2 - (Cre * Cre + Cim * Cim) / (E1))) */ - sqrtVal = W_extract_h( W_add( W_mult0_32_32( Cre, Cre ), W_mult0_32_32( Cim, Cim ) ) ); + i0 = LE_32( E1, E2 ); + i1 = L_xor( i0, 1 ); - /* Special treatment for E1_norm = 0x40000000, Result is known: 2 * sqrtVal */ - IF( GT_32( E1, 0x40000000 ) ) - { - sqrtVal = div_w_newton( sqrtVal, E1 ); - } - q_sqrtVal = sub( add( q_C, q_C ), add( q_E, 2 ) ); /* q_sqrtVal = 2 *q_C - q_E1_norm - 2 */ - q_min = s_min( q_E, q_sqrtVal ); - sqrtVal = L_sub( L_shl( E2, sub( q_min, q_E ) ), L_shl( sqrtVal, sub( q_min, q_sqrtVal ) ) ); - - sqrtVal = L_max( 0, sqrtVal ); - - if ( sqrtVal == 0 ) - { - q_min = Q31; - move16(); - } - - /* normalize sqrtVal */ - exp = norm_l( sqrtVal ); - sqrtVal = L_shl( sqrtVal, exp ); - q_min = add( q_min, exp ); - exp = sub( Q31, q_min ); - iSqrtVal = Isqrt_lc1( sqrtVal, &exp ); - - outRe[1][1] = Mpy_32_32( sqrtVal, iSqrtVal ); - q_out_1_1 = sub( q_min, exp ); + /* Perform Cholesky decomposition according to louder channel first */ + E_tmp = E1; + move32(); - /* normalize q_out */ - *q_out = s_min( s_min( q_out_0_0, q_out_1_1 ), q_out_1_0 ); - *q_out = sub( *q_out, 1 ); /* 1 bit headroom */ - move16(); - outRe[1][1] = L_shr( outRe[1][1], sub( q_out_1_1, *q_out ) ); - move32(); - outRe[1][0] = L_shr( outRe[1][0], sub( q_out_1_0, *q_out ) ); - move32(); - outIm[1][0] = L_shr( outIm[1][0], sub( q_out_1_0, *q_out ) ); - move32(); - outRe[0][0] = L_shr( outRe[0][0], sub( q_out_0_0, *q_out ) ); - move32(); - } - ELSE + if ( i0 ) { - /* outRe[1][1] = sqrt(E2) */ - exp = sub( Q31, q_E ); - iSqrtVal = Isqrt_lc1( E2, &exp ); - outRe[1][1] = Mpy_32_32( E2, iSqrtVal ); - q_out_1_1 = sub( q_E, exp ); - - /* outRe[0][1] = Cre / (outRe[1][1]);*/ - q_out_0_1 = Q31; - move16(); - outRe[0][1] = Mpy_32_32( Cre, iSqrtVal ); - if ( Cre != 0 ) - q_out_0_1 = sub( q_C, exp ); - - /* outIm[0][1] = Cim / (outRe[1][1]); */ - outIm[0][1] = L_negate( Mpy_32_32( Cim, iSqrtVal ) ); - if ( Cim != 0 ) - q_out_0_1 = sub( q_C, exp ); - - /* outRe[0][0] = sqrt(max(0.0f, E1 - (Cre * Cre + Cim * Cim) / (E2))) */ - sqrtVal = W_extract_h( W_add( W_mult0_32_32( Cre, Cre ), W_mult0_32_32( Cim, Cim ) ) ); /* q_sqrtVal = 2 * q_C - 1 */ - - /* Special treatment for E2_norm = 0x40000000, Result is known: 2 * sqrtVal */ - IF( GT_32( E2, 0x40000000 ) ) - { - sqrtVal = div_w_newton( sqrtVal, E2 ); - } - q_sqrtVal = sub( add( q_C, q_C ), add( q_E, 2 ) ); /* q_sqrtVal = 2 *q_C - q_E2_norm - 2 */ - q_min = s_min( q_E, q_sqrtVal ); - sqrtVal = L_sub( L_shl( E1, q_min - q_E ), L_shl( sqrtVal, q_min - q_sqrtVal ) ); - - sqrtVal = L_max( sqrtVal, 0 ); - - if ( sqrtVal == 0 ) - { - q_min = Q31; - move16(); - } - - /* normalize sqrtVal */ - exp = norm_l( sqrtVal ); - sqrtVal = L_shl( sqrtVal, exp ); - q_min = add( q_min, exp ); - exp = sub( Q31, q_min ); - iSqrtVal = Isqrt_lc1( sqrtVal, &exp ); - - outRe[0][0] = Mpy_32_32( sqrtVal, iSqrtVal ); - q_out_0_0 = sub( q_min, exp ); - - /* normalize q_out */ - *q_out = s_min( s_min( q_out_0_0, q_out_1_1 ), q_out_0_1 ); - *q_out = sub( *q_out, 1 ); /* 1 bit headroom */ - move16(); - outRe[1][1] = L_shr( outRe[1][1], sub( q_out_1_1, *q_out ) ); - move32(); - outRe[0][1] = L_shr( outRe[0][1], sub( q_out_0_1, *q_out ) ); - move32(); - outIm[0][1] = L_shr( outIm[0][1], sub( q_out_0_1, *q_out ) ); - move32(); - outRe[0][0] = L_shr( outRe[0][0], sub( q_out_0_0, *q_out ) ); + E1 = E2; move32(); } - return; -} - -static void eig2x2_fx( - Word32 E1_fx, /*q_E*/ - Word32 E2_fx, /*q_E*/ - Word16 q_E, - Word32 Cre_fx, /*q_C*/ - Word32 Cim_fx, /*q_C*/ - Word16 q_C, - Word32 Ure_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_U, 1 bit Headroom */ - Word32 Uim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_U, 1 bit Headroom */ - Word16 *q_U, - Word32 D_fx[BINAURAL_CHANNELS], /*q_D*/ - Word16 *q_D ) -{ - Word16 chA, chB, ch; - - Word32 add_fx, sub_fx, pm_fx, isqrt_fx, s1_fx, s2_fx, normVal_fx, normVal_isqrt_fx, temp_fx; - Word64 crossSquare_fx, a_fx; - Word16 q_E2, q_C2, q_a, q_min, exp, q_pm, q_s, q_s2, q_norm; - - FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ ) + if ( i0 ) { - FOR( chB = 0; chB < BINAURAL_CHANNELS; chB++ ) - { - Ure_fx[chA][chB] = 0; - move32(); - Uim_fx[chA][chB] = 0; - move32(); - } + E2 = E_tmp; + move32(); } - exp = get_min_scalefactor( E1_fx, E2_fx ); - E1_fx = L_shl( E1_fx, exp ); - E2_fx = L_shl( E2_fx, exp ); - q_E = add( q_E, exp ); - q_E2 = add( q_E, q_E ); - - exp = get_min_scalefactor( Cre_fx, Cim_fx ); - Cre_fx = L_shl( Cre_fx, exp ); - Cim_fx = L_shl( Cim_fx, exp ); - q_C = add( q_C, exp ); - q_C2 = add( q_C, q_C ); - - add_fx = W_extract_l( W_shr( W_add( W_deposit32_l( E1_fx ), W_deposit32_l( E2_fx ) ), 1 ) ); /* 0.5 * ( E1 + E2 ) */ - sub_fx = L_sub( E1_fx, E2_fx ); - - /* - * crossSquare = (Cre * Cre) + (Cim * Cim) - */ - crossSquare_fx = W_add( W_mult0_32_32( Cre_fx, Cre_fx ), W_mult0_32_32( Cim_fx, Cim_fx ) ); /* 2 * q_C */ - - /* - * a = (E1 + E2) * (E1 + E2) - 4.0f * ((E1 * E2) - crossSquare) - * a = (E1 - E2) ^ 2 + 4 * crossSquare - */ - - a_fx = W_mult0_32_32( sub_fx, sub_fx ); /* 2 * q_E */ - - q_C2 = add( q_C, q_C ); - q_a = sub( s_min( q_E2, q_C2 ), 2 ); - a_fx = W_add( W_shr( a_fx, sub( q_E2, q_a ) ), W_shr( crossSquare_fx, sub( q_C2, add( q_a, 2 ) ) ) ); - - exp = W_norm( a_fx ); - a_fx = W_shl( a_fx, exp ); - q_a = add( q_a, exp ); - pm_fx = W_extract_h( a_fx ); - pm_fx = L_max( 0, pm_fx ); - q_pm = sub( q_a, Q31 + 1 ); + /* outRe[0][0] = sqrt(E1) */ + exp = sub( Q31, q_E ); + iSqrtVal = Isqrt_lc1( E1, &exp ); + iSqrtVal = Isqrt_newton_iter( E1, iSqrtVal, q_E, exp ); + outRe[i0][i0] = Mpy_32_32( E1, iSqrtVal ); + q_out_0_0 = sub( q_E, exp ); - /* - * add = 0.5f * ( E1 + E2 ) - * pm = 0.5f * sqrtf( max( 0.0f, a ) ) - * add = 0.5f * ( E1 + E2 ) - * D[0] = add + pm - * D[1] = max( 0.0f, add - pm ) - */ - - q_min = q_E; + /* outRe[1][0] = Cre / (outRe[0][0]);*/ + q_out_1_0 = Q31; move16(); + outRe[i1][i0] = Mpy_32_32( Cre, iSqrtVal ); + if ( Cre != 0 ) + q_out_1_0 = sub( q_C, exp ); - IF( GT_32( pm_fx, 0 ) ) - { - /* pm_fx is normalized */ - exp = sub( Q31, q_pm ); - isqrt_fx = Isqrt_lc1( pm_fx, &exp ); - isqrt_fx = Isqrt_newton_iter( pm_fx, isqrt_fx, q_pm, exp ); - - pm_fx = Mpy_32_32( pm_fx, isqrt_fx ); - q_pm = sub( q_pm, exp ); - q_pm = add( q_pm, 1 ); /* 0.5 * sqrt( pm ) */ - - /* normalize */ - q_min = s_min( q_pm, q_E ); - q_min = sub( q_min, 1 ); - add_fx = L_shr( add_fx, q_E - q_min ); - pm_fx = L_shr( pm_fx, q_pm - q_min ); - } - + /* outIm[1][0] = Cim / (outRe[0][0]); */ + move16(); + outIm[i1][i0] = Mpy_32_32( Cim, iSqrtVal ); + if ( i0 ) + outIm[i1][i0] = L_negate( outIm[i1][i0] ); + if ( Cim != 0 ) + q_out_1_0 = sub( q_C, exp ); - temp_fx = L_abs( L_sub( add_fx, pm_fx ) ); + /* outRe[i1][i1] = sqrt(max(0.0f, E2 - (Cre * Cre + Cim * Cim) / (E1))) */ + sqrtVal = W_extract_h( W_add( W_mult0_32_32( Cre, Cre ), W_mult0_32_32( Cim, Cim ) ) ); - if ( LT_32( temp_fx, ONE_IN_Q7 ) ) + /* Special treatment for E1_norm = 0x40000000, Result is known: 2 * sqrtVal */ + IF( GT_32( E1, 0x40000000 ) ) { - pm_fx = add_fx; - move32(); + sqrtVal = div_w_newton( sqrtVal, E1 ); } + q_sqrtVal = sub( add( q_C, q_C ), add( q_E, 2 ) ); /* q_sqrtVal = 2 *q_C - q_E1_norm - 2 */ + q_min = s_min( q_E, q_sqrtVal ); + sqrtVal = L_sub( L_shl( E2, sub( q_min, q_E ) ), L_shl( sqrtVal, sub( q_min, q_sqrtVal ) ) ); - D_fx[0] = L_add( add_fx, pm_fx ); - D_fx[1] = L_max( 0, L_sub( add_fx, pm_fx ) ); - - *q_D = q_min; - move16(); + sqrtVal = L_max( 0, sqrtVal ); - *q_U = Q30; - move16(); - /* Numeric case, when input is practically zeros */ - IF( LT_32( L_shl_sat( D_fx[0], sub( sub( 31, *q_D ), EPSILON_EXP ) ), EPSILON_MANT ) ) + if ( sqrtVal == 0 ) { - Ure_fx[0][0] = ONE_IN_Q30; - Ure_fx[1][1] = ONE_IN_Q30; - return; - } - - /* Numeric case, when input is near an identity matrix with a gain */ - IF( LT_32( pm_fx, Mpy_32_32( INV_1000_Q31, add_fx ) ) ) - { - Ure_fx[0][0] = ONE_IN_Q30; - Ure_fx[1][1] = ONE_IN_Q30; - return; + q_min = Q31; + move16(); } - /* Eigenvectors */ - q_s = s_min( *q_D, q_E ); - q_s2 = add( q_s, q_s ); - FOR( ch = 0; ch < BINAURAL_CHANNELS; ch++ ) - { - s1_fx = L_sub( L_shr( D_fx[ch], *q_D - q_s ), L_shr( E1_fx, q_E - q_s ) ); - s2_fx = L_sub( L_shr( D_fx[ch], *q_D - q_s ), L_shr( E2_fx, q_E - q_s ) ); - - IF( L_abs( s2_fx ) > L_abs( s1_fx ) ) - { - a_fx = W_mult0_32_32( s2_fx, s2_fx ); /* 2*q_s */ - q_min = s_min( q_s2, q_C2 ); - a_fx = W_shr( a_fx, sub( q_s2, q_min ) ); - a_fx = W_add( a_fx, W_shr( crossSquare_fx, sub( q_C2, q_min ) ) ); /* add 1e-12 ommitted */ - - exp = W_norm( a_fx ); - a_fx = W_shl( a_fx, exp ); - normVal_fx = W_extract_h( a_fx ); - q_norm = sub( add( q_min, exp ), Q31 + 1 ); - - /* normVal_fx is normalized */ - exp = sub( Q31, q_norm ); - normVal_isqrt_fx = Isqrt_lc1( normVal_fx, &exp ); - normVal_isqrt_fx = Isqrt_newton_iter( normVal_fx, normVal_isqrt_fx, q_norm, exp ); - - q_norm = add( Q31, exp ); - - a_fx = W_mult0_32_32( s2_fx, normVal_isqrt_fx ); /* q_s + q_norm */ - a_fx = W_shl( a_fx, sub( q_norm, q_s ) ); /* Q62 */ - Ure_fx[0][ch] = W_extract_h( a_fx ); /* Q30 */ - - a_fx = W_mult0_32_32( Cre_fx, normVal_isqrt_fx ); /* q_C + q_norm */ - a_fx = W_shl( a_fx, sub( q_norm, q_C ) ); /* Q62 */ - Ure_fx[1][ch] = W_extract_h( a_fx ); /* Q30 */ - - a_fx = W_mult0_32_32( Cim_fx, normVal_isqrt_fx ); /* q_C + q_norm */ - a_fx = W_shl( a_fx, sub( q_norm, q_C ) ); /* Q62 */ - Uim_fx[1][ch] = W_extract_h( a_fx ); /* Q30 */ - } - ELSE - { - a_fx = W_mult0_32_32( s1_fx, s1_fx ); // 2*q_s - q_min = s_min( q_s2, q_C2 ); - a_fx = W_shr( a_fx, sub( q_s2, q_min ) ); - a_fx = W_add( a_fx, W_shr( crossSquare_fx, sub( q_C2, q_min ) ) ); /* add 1e-12 ommitted */ - exp = W_norm( a_fx ); - a_fx = W_shl( a_fx, exp ); - normVal_fx = W_extract_h( a_fx ); - q_norm = sub( add( q_min, exp ), Q31 + 1 ); - - /* normVal_fx is normalized */ - exp = sub( Q31, q_norm ); - normVal_isqrt_fx = Isqrt_lc1( normVal_fx, &exp ); - normVal_isqrt_fx = Isqrt_newton_iter( normVal_fx, normVal_isqrt_fx, q_norm, exp ); - - q_norm = add( Q31, exp ); - - a_fx = W_mult0_32_32( s1_fx, normVal_isqrt_fx ); /* q_s + q_norm */ - a_fx = W_shl( a_fx, sub( q_norm, q_s ) ); /* Q62 */ - Ure_fx[1][ch] = W_extract_h( a_fx ); /* Q30 */ + /* normalize sqrtVal */ + exp = norm_l( sqrtVal ); + sqrtVal = L_shl( sqrtVal, exp ); + q_min = add( q_min, exp ); + exp = sub( Q31, q_min ); + iSqrtVal = Isqrt_lc1( sqrtVal, &exp ); - a_fx = W_mult0_32_32( Cre_fx, normVal_isqrt_fx ); /* q_C + q_norm */ - a_fx = W_shl( a_fx, sub( q_norm, q_C ) ); /* Q62 */ - Ure_fx[0][ch] = W_extract_h( a_fx ); /* Q30 */ + outRe[i1][i1] = Mpy_32_32( sqrtVal, iSqrtVal ); + q_out_1_1 = sub( q_min, exp ); - a_fx = W_mult0_32_32( Cim_fx, normVal_isqrt_fx ); /* q_C + q_norm */ - a_fx = W_shl( a_fx, sub( q_norm, q_C ) ); /* Q62 */ - Uim_fx[0][ch] = L_negate( W_extract_h( a_fx ) ); /* Q30 */ - } - } + /* normalize q_out */ + *q_out = s_min( s_min( q_out_0_0, q_out_1_1 ), q_out_1_0 ); + *q_out = sub( *q_out, 1 ); /* 1 bit headroom */ + move16(); + outRe[i1][i1] = L_shr( outRe[i1][i1], sub( q_out_1_1, *q_out ) ); + move32(); + outRe[i1][i0] = L_shr( outRe[i1][i0], sub( q_out_1_0, *q_out ) ); + move32(); + outIm[i1][i0] = L_shr( outIm[i1][i0], sub( q_out_1_0, *q_out ) ); + move32(); + outRe[i0][i0] = L_shr( outRe[i0][i0], sub( q_out_0_0, *q_out ) ); + move32(); + return; } - static void formulate2x2MixingMatrix_fx( Word32 Ein1_fx, /*q_Ein*/ Word32 Ein2_fx, /*q_Ein*/ @@ -5036,6 +4450,8 @@ static void formulate2x2MixingMatrix_fx( Word32 denom; Word16 q_Pre[BINAURAL_CHANNELS][BINAURAL_CHANNELS], q_Pim[BINAURAL_CHANNELS][BINAURAL_CHANNELS]; Word16 hdrm_re[BINAURAL_CHANNELS][BINAURAL_CHANNELS], hdrm_im[BINAURAL_CHANNELS][BINAURAL_CHANNELS]; + Word32 i_regularizationFactor_fx; + Flag doRegularization; set16_fx( hdrm_re[0], 63, i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ) ); set16_fx( hdrm_im[0], 63, i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ) ); set16_fx( q_Pre[0], Q31, i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ) ); @@ -5169,46 +4585,56 @@ static void formulate2x2MixingMatrix_fx( /* Regularize the diagonal Sx for matrix inversion Sx_fx[0] = max( Sx_fx[0], regularizationFactor * Sx_fx[1] ) Sx_fx[1] = max( Sx_fx[1], regularizationFactor * Sx_fx[0] ) - TBD: use the isqrts from above, multiplied by 1/ regularizationFactor */ - Sx_fx[0] = L_max( Sx_fx[0], L_shl_sat( Mpy_32_16_1( Sx_fx[1], regularizationFactor_fx ), 1 ) ); - Sx_fx[1] = L_max( Sx_fx[1], L_shl_sat( Mpy_32_16_1( Sx_fx[0], regularizationFactor_fx ), 1 ) ); - move32(); - move32(); + Reuse calculated Sx_Inv_fx[0] and Sx_Inv_fx[1] in conjunction with + i_regularizationFactor to avoid unnecessary division later by + Sx_fx[0] and Sx_fx[1] to calculate nearest orthonormal matrix P - q_Sx_Inv = add( Q30, sub( Q31, q_Sx ) ); + This is equivalent to + Sx_Inv_fx[0] = min( Sx_Inv_fx[0], i_regularizationFactor * Sx_Inv_fx[1] ) + Sx_Inv_fx[1] = min( Sx_Inv_fx[1], i_regularizationFactor * Sx_Inv_fx[0] ) + */ - Sx_Inv_fx[0] = ONE_IN_Q31; - move32(); - q_Sx_Inv_0 = Q30; - move16(); + /* Note: since there are only a few regularization factors, can be replaced by table lookup */ + i_regularizationFactor_fx = L_shl( ONE_IN_Q30 / regularizationFactor_fx, Q15 - Q3 ); /* Q28 */ - exp = norm_l( Sx_fx[0] ); - Sx_fx[0] = L_shl( Sx_fx[0], exp ); - - /* Special treatment for den = 0x40000000, Result is known: 2 * ONE_IN_Q31 */ - IF( GT_32( Sx_fx[0], 0x40000000 ) ) + doRegularization = LT_32( Sx_fx[0], L_shl_sat( Mpy_32_16_1( Sx_fx[1], regularizationFactor_fx ), 1 ) ); + if ( doRegularization ) { - Sx_Inv_fx[0] = div_w_newton( ONE_IN_Q31, Sx_fx[0] ); + Sx_Inv_fx[0] = Mpy_32_32( i_regularizationFactor_fx, Sx_Inv_fx[1] ); + move32(); } - q_Sx_Inv_0 = sub( q_Sx_Inv, exp ); - - Sx_Inv_fx[1] = ONE_IN_Q31; - move32(); - q_Sx_Inv_1 = Q30; - move16(); - - exp = norm_l( Sx_fx[1] ); - Sx_fx[1] = L_shl( Sx_fx[1], exp ); + if ( doRegularization ) + { + q_Sx_Inv_0 = q_Sx_Inv_1; + move16(); + } + if ( doRegularization ) + q_Sx_Inv_0 = sub( q_Sx_Inv_0, Q3 ); - /* Special treatment for den = 0x40000000, Result is known: 2 * ONE_IN_Q31 */ - IF( GT_32( Sx_fx[1], 0x40000000 ) ) + doRegularization = LT_32( Sx_fx[1], L_shl_sat( Mpy_32_16_1( Sx_fx[0], regularizationFactor_fx ), 1 ) ); + if ( doRegularization ) { - Sx_Inv_fx[1] = div_w_newton( ONE_IN_Q31, Sx_fx[1] ); + Sx_Inv_fx[1] = Mpy_32_32( i_regularizationFactor_fx, Sx_Inv_fx[0] ); + move32(); } - q_Sx_Inv_1 = sub( q_Sx_Inv, exp ); + if ( doRegularization ) + { + q_Sx_Inv_1 = q_Sx_Inv_0; + move16(); + } + if ( doRegularization ) + q_Sx_Inv_1 = sub( q_Sx_Inv_1, Q3 ); /* normalization */ + exp = norm_l( Sx_Inv_fx[0] ); + Sx_Inv_fx[0] = L_shl( Sx_Inv_fx[0], exp ); + q_Sx_Inv_0 = add( q_Sx_Inv_0, exp ); + + exp = norm_l( Sx_Inv_fx[1] ); + Sx_Inv_fx[1] = L_shl( Sx_Inv_fx[1], exp ); + q_Sx_Inv_1 = add( q_Sx_Inv_1, exp ); + q_Sx_Inv = s_min( q_Sx_Inv_0, q_Sx_Inv_1 ); Sx_Inv_fx[0] = L_shr( Sx_Inv_fx[0], q_Sx_Inv_0 - q_Sx_Inv ); Sx_Inv_fx[1] = L_shr( Sx_Inv_fx[1], q_Sx_Inv_1 - q_Sx_Inv ); @@ -5278,9 +4704,7 @@ static void formulate2x2MixingMatrix_fx( } q_temp = sub( add( q_ky, q_GhatQ ), 31 ); -#ifdef OPT_2182_MATRIX_SCALE_OPS matrixScale_fx( tmpRe_fx, tmpIm_fx, &q_temp ); -#endif /* A = Ky' * G_hat * Q * Kx (see publication) */ matrixMul_fx( tmpRe_fx, tmpIm_fx, &q_temp, @@ -5326,9 +4750,7 @@ static void formulate2x2MixingMatrix_fx( div_fx[1] = Isqrt_lc1( D_fx_norm, &exp ); q_div[1] = sub( Q31, exp ); } -#ifdef OPT_2182_MATRIX_SCALE_OPS - matrixScale_fx( Are_fx, Aim_fx, &q_A ); -#endif + matrixMul_fx( Are_fx, Aim_fx, &q_A, Ure_fx, Uim_fx, &q_U, /* Ure_fx, Uim_fx have 1 bit headroom and are normalized, no scaling neccessary */ tmpRe_fx, tmpIm_fx, &q_temp ); @@ -5368,17 +4790,10 @@ static void formulate2x2MixingMatrix_fx( } q_temp = sub( add( q_temp, q_div_min ), Q31 ); -#ifdef OPT_2182_MATRIX_SCALE_OPS matrixTransp2Mul_fx( tmpRe_fx, tmpIm_fx, &q_temp, /* tmpRe_fx, tmpIm_fx have 1 bit headroom and are normalized, no scaling neccessary */ Ure_fx, Uim_fx, &q_U, /* Ure_fx, Uim_fx have 1 bit headroom and are normalized, no scaling neccessary */ Pre_fx, Pim_fx, &q_P ); /* Nearest orthonormal matrix P to matrix A formulated */ -#else - matrixTransp2Mul_fx( tmpRe_fx, tmpIm_fx, &q_temp, Ure_fx, Uim_fx, &q_U, - 0 /*int Ascale*/, - 0 /*int Bscale*/, - Pre_fx, Pim_fx, &q_P ); /* Nearest orthonormal matrix P to matrix A formulated */ -#endif /* These are the final formulas of the JAES publication M = Ky P Kx^(-1) */ @@ -5396,19 +4811,10 @@ static void formulate2x2MixingMatrix_fx( Pre_fx, Pim_fx, &q_P, /* Pre_fx, Pim_fx have 1 bit headroom and are normalized, no scaling neccessary */ tmpRe_fx, tmpIm_fx, &q_temp ); - -#ifdef OPT_2182_MATRIX_SCALE_OPS - matrixScale_fx( tmpRe_fx, tmpIm_fx, &q_temp ); matrixTransp2Mul_fx( tmpRe_fx, tmpIm_fx, &q_temp, Uxre_fx, Uxim_fx, &q_Ux, /* Ure_fx, Uim_fx have 1 bit headroom and are normalized, no scaling neccessary */ Mre_fx, Mim_fx, q_M ); -#else - matrixTransp2Mul_fx( tmpRe_fx, tmpIm_fx, &q_temp, Uxre_fx, Uxim_fx, &q_Ux, - 1 /*int Ascale*/, - 0 /*int Bscale*/, - Mre_fx, Mim_fx, q_M ); -#endif return; } @@ -5911,10 +5317,9 @@ static void formulate2x2MixingMatrix_fx( q_temp = sub( add( q_ky, q_GhatQ ), 31 ); /* A = Ky' * G_hat * Q * Kx (see publication) */ -#ifdef OPT_2182_MATRIX_SCALE_OPS matrixScale_fx( tmpRe_fx, tmpIm_fx, &q_temp ); matrixScale_fx( Kxre_fx, Kxim_fx, &q_Kx ); -#endif + matrixMul_fx( tmpRe_fx, tmpIm_fx, &q_temp, Kxre_fx, Kxim_fx, &q_Kx, Are_fx, Aim_fx, &q_A ); /* Find nearest orthonormal matrix P to A = Ky' * G_hat * Q * Kx @@ -5972,10 +5377,9 @@ static void formulate2x2MixingMatrix_fx( div_fx[0] = L_min( div_fx[0], thresh ); // q_div div_fx[1] = L_min( div_fx[1], thresh ); // q_div -#ifdef OPT_2182_MATRIX_SCALE_OPS matrixScale_fx( Are_fx, Aim_fx, &q_A ); matrixScale_fx( Ure_fx, Uim_fx, &q_U ); -#endif + matrixMul_fx( Are_fx, Aim_fx, &q_A, Ure_fx, Uim_fx, &q_U, tmpRe_fx, tmpIm_fx, &q_temp ); exp = L_norm_arr( div_fx, BINAURAL_CHANNELS ); @@ -6039,17 +5443,10 @@ static void formulate2x2MixingMatrix_fx( } } -#ifdef OPT_2182_MATRIX_SCALE_OPS matrixTransp2Mul_fx( tmpRe_fx, tmpIm_fx, &q_temp, Ure_fx, Uim_fx, &q_U, Pre_fx, Pim_fx, &q_P ); /* Nearest orthonormal matrix P to matrix A formulated */ -#else - matrixTransp2Mul_fx( tmpRe_fx, tmpIm_fx, &q_temp, Ure_fx, Uim_fx, &q_U, - 0 /*int Ascale*/, - 0 /*int Bscale*/, - Pre_fx, Pim_fx, &q_P ); /* Nearest orthonormal matrix P to matrix A formulated */ -#endif /* These are the final formulas of the JAES publication M = Ky P Kx^(-1) */ #if ( BINAURAL_CHANNELS != 2 ) @@ -6099,7 +5496,7 @@ static void formulate2x2MixingMatrix_fx( } } #else - /* BINAURAL_CHANNEL == 2 */ + /* BINAURAL_CHANNEL == 2 */ FOR( chB = 0; chB < BINAURAL_CHANNELS; chB++ ) { IF( Sx_fx[chB] == 0 ) @@ -6180,23 +5577,14 @@ static void formulate2x2MixingMatrix_fx( } } -#ifdef OPT_2182_MATRIX_SCALE_OPS matrixScale_fx( KyRe_fx, KyIm_fx, &q_ky ); matrixScale_fx( Pre_fx, Pim_fx, &q_P ); -#endif matrixMul_fx( KyRe_fx, KyIm_fx, &q_ky, Pre_fx, Pim_fx, &q_P, tmpRe_fx, tmpIm_fx, &q_temp ); -#ifdef OPT_2182_MATRIX_SCALE_OPS matrixScale_fx( tmpRe_fx, tmpIm_fx, &q_temp ); matrixTransp2Mul_fx( tmpRe_fx, tmpIm_fx, &q_temp, Uxre_fx, Uxim_fx, &q_Ux, Mre_fx, Mim_fx, q_M ); -#else - matrixTransp2Mul_fx( tmpRe_fx, tmpIm_fx, &q_temp, Uxre_fx, Uxim_fx, &q_Ux, - 1 /*int Ascale*/, - 0 /*int Bscale*/, - Mre_fx, Mim_fx, q_M ); -#endif return; } -- GitLab From b60ccf5772a52883a98b1307fca4dacdc9d08acb Mon Sep 17 00:00:00 2001 From: mave2802 <59919483+mave2802@users.noreply.github.com> Date: Fri, 19 Dec 2025 21:38:56 +0100 Subject: [PATCH 4/5] fixed rebase conflicts --- lib_com/options.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/lib_com/options.h b/lib_com/options.h index 20004a4f3..6e298e847 100644 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -133,11 +133,7 @@ /* #################### Start BASOP optimization switches ############################ */ -#define OPT_2181_MATRIX_TRANSP_1_MUL /* Dolby: Issue 2181, optimize matrixTransp1Mul_fx. */ -#define OPT_2182_MATRIX_SCALE_OPS /* Dolby: Issue 2181, move matrix scale operations outside mul operations. */ -#define OPT_2185_MATRIX_OUT_SCALING /* Dolby: Issue 2185, optimize matrix-mul output-format. */ #define NONBE_OPT_2239_IVAS_FILTER_PROCESS /* Dolby: Issue 2239, optimize ivas_filter_process_fx. */ -#define NONBE_OPT_2193_EIG2X2 /* Dolby: Issue 2193, optimize eig2x2_fx. */ #define BE_FIX_2240_COMPUTE_COV_MTC_FX_FAST /* FhG: Speeds up covariance calculation e.g. 60 WMOPS for encoding -mc 7_1_4 24400 48 */ #define NONBE_2169_BINAURAL_MIXING_MATRIX_OPT /* Dolby: use optimized formulate2x2MixingMatrix() function and subfunctions */ /* #################### End BASOP optimization switches ############################ */ -- GitLab From d67c9441e346c4f37307adc62256c44fbaeda9c1 Mon Sep 17 00:00:00 2001 From: mave2802 <59919483+mave2802@users.noreply.github.com> Date: Fri, 19 Dec 2025 21:42:37 +0100 Subject: [PATCH 5/5] clang format --- lib_com/options.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib_com/options.h b/lib_com/options.h index 6e298e847..2a0bf0299 100644 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -133,9 +133,9 @@ /* #################### Start BASOP optimization switches ############################ */ -#define NONBE_OPT_2239_IVAS_FILTER_PROCESS /* Dolby: Issue 2239, optimize ivas_filter_process_fx. */ -#define BE_FIX_2240_COMPUTE_COV_MTC_FX_FAST /* FhG: Speeds up covariance calculation e.g. 60 WMOPS for encoding -mc 7_1_4 24400 48 */ -#define NONBE_2169_BINAURAL_MIXING_MATRIX_OPT /* Dolby: use optimized formulate2x2MixingMatrix() function and subfunctions */ +#define NONBE_OPT_2239_IVAS_FILTER_PROCESS /* Dolby: Issue 2239, optimize ivas_filter_process_fx. */ +#define BE_FIX_2240_COMPUTE_COV_MTC_FX_FAST /* FhG: Speeds up covariance calculation e.g. 60 WMOPS for encoding -mc 7_1_4 24400 48 */ +#define NONBE_2169_BINAURAL_MIXING_MATRIX_OPT /* Dolby: use optimized formulate2x2MixingMatrix() function and subfunctions */ /* #################### End BASOP optimization switches ############################ */ -- GitLab