diff --git a/lib_com/options.h b/lib_com/options.h
index 2c70dd14f900bcf3ae42a391332364aa639d8451..2a0bf02997ab0db51ad438871dfb58c85907ac84 100644
--- a/lib_com/options.h
+++ b/lib_com/options.h
@@ -77,6 +77,10 @@
 /* ################### Start MAINTENANCE switches ########################### */
 #define FIX_2255_ISAR_RENDER_POSES                           /* VA: issue 2255: fix missing check in isar_render_poses() */
 
+/* ################### Start BE switches ################################# */
+/* only BE switches wrt wrt. TS 26.251 V3.0 */
+
+
 /* ################### Start BE switches ################################# */
 /* only BE switches wrt wrt. TS 26.251 V3.0 */
 
@@ -122,19 +126,16 @@
 
 /* ##################### End NON-BE switches ########################### */
 
+
 /* ################## End MAINTENANCE switches ######################### */
 
 /* clang-format on */
 
 /* #################### Start BASOP optimization switches ############################ */
 
-#define OPT_2181_MATRIX_TRANSP_1_MUL        /* Dolby: Issue 2181, optimize matrixTransp1Mul_fx. */
-#define OPT_2182_MATRIX_SCALE_OPS           /* Dolby: Issue 2181, move matrix scale operations outside mul operations. */
-#define OPT_2185_MATRIX_OUT_SCALING         /* Dolby: Issue 2185, optimize matrix-mul output-format. */
-#define NONBE_OPT_2239_IVAS_FILTER_PROCESS  /* Dolby: Issue 2239, optimize ivas_filter_process_fx. */
-#define NONBE_OPT_2193_EIG2X2               /* Dolby: Issue 2193, optimize eig2x2_fx. */
-#define BE_FIX_2240_COMPUTE_COV_MTC_FX_FAST /* FhG: Speeds up covariance calculation e.g. 60 WMOPS for encoding -mc 7_1_4 24400 48 */
-
+#define NONBE_OPT_2239_IVAS_FILTER_PROCESS    /* Dolby: Issue 2239, optimize ivas_filter_process_fx. */
+#define BE_FIX_2240_COMPUTE_COV_MTC_FX_FAST   /* FhG: Speeds up covariance calculation e.g. 60 WMOPS for encoding -mc 7_1_4 24400 48 */
+#define NONBE_2169_BINAURAL_MIXING_MATRIX_OPT /* Dolby: use optimized formulate2x2MixingMatrix() function and subfunctions */
 /* #################### End BASOP optimization switches ############################ */
 
 
diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
index 0152bf465a270b99520b097404e72c8b69770fcb..d4e6a6b2828cf49cd7bb66c9f2ad7870321c8d7c 100644
--- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
+++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
@@ -51,17 +51,19 @@
 static Word16 slot_fx[4] = { 32767, 16384, 10922, 8192 };
 
 #define STEREO_PREPROCESS_IIR_FACTOR_Q15 ( 29491 ) // 0.9f in Q15
-/* powf(0.95f, 4.0f) for sub-frame smoothing instead of CLDFB slot */
-#define ADAPT_HTPROTO_ROT_LIM_0_FX 429496736  // 0.4f in Q30
-#define TWO_POINT_FIVE_IN_Q13      20480      // Q13
-#define ADAPT_HTPROTO_IIR_FAC_FX   26689      // Q15
-#define LOG_10_BASE_2_Q29          1783446528 // Q29
-#define TAN_30_FX                  17157      // Q15
-#define INV_TAN30_FX               28377      // Q14
-#define EPSILON_MANT               1180591621 /* 1e-12 = 0.5497558*(2^-39) in Q70 */
+                                                   /* powf(0.95f, 4.0f) for sub-frame smoothing instead of CLDFB slot */
+#define ADAPT_HTPROTO_ROT_LIM_0_FX 429496736       // 0.4f in Q30
+#define TWO_POINT_FIVE_IN_Q13      20480           // Q13
+#define ADAPT_HTPROTO_IIR_FAC_FX   26689           // Q15
+#define LOG_10_BASE_2_Q29          1783446528      // Q29
+#define TAN_30_FX                  17157           // Q15
+#define INV_TAN30_FX               28377           // Q14
+#define EPSILON_MANT               1180591621      /* 1e-12 = 0.5497558*(2^-39) in Q70 */
 #define EPSILON_EXP                ( -39 )
 #define ONE_DIV_EPSILON_MANT       1953125000 /* 1e+12 = 0.9094947*(2^40) */
 #define ONE_DIV_EPSILON_EXP        ( 40 )
+#define TEN_THOUSAND_Q17           1310720000 /* 10000.0 in Q17 */
+#define ONE_DIV_100000000_Q57      1441147784 /* 1.0 / (10000.0 * 10000.0) in Q57 */
 
 #define SMALL_EIGENVALUE 50
 
@@ -117,17 +119,11 @@ static void ivas_masa_ext_rend_parambin_internal_fx( MASA_EXT_REND_HANDLE hMasaE
 
 static void formulate2x2MixingMatrix_fx( Word32 Ein1_fx /*q_Ein*/, Word32 Ein2_fx /*q_Ein*/, Word16 q_Ein, Word32 CinRe_fx /*q_Cin*/, Word32 CinIm_fx /*q_Cin*/, Word16 q_Cin, Word32 Eout1_fx /*q_Eout*/, Word32 Eout2_fx /*q_Eout*/, Word16 q_Eout, Word32 CoutRe_fx /*q_Cout*/, Word32 CoutIm_fx /*q_Cout*/, Word16 q_Cout, Word32 Q_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*Q31*/, Word32 Mre_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_M*/, Word32 Mim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_M*/, Word16 *q_M, const Word16 regularizationFactor_fx /*Q14*/ );
 
-#ifdef OPT_2182_MATRIX_SCALE_OPS
 static void matrixScale_fx( Word32 Are_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_A*/, Word32 Aim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_A*/, Word16 *q_A );
-#endif
 
 static void matrixMul_fx( Word32 Are[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_A*/, Word32 Aim[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_A*/, Word16 *q_A, Word32 Bre[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_B*/, Word32 Bim[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_B*/, Word16 *q_B, Word32 outRe[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_out*/, Word32 outIm[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_out*/, Word16 *q_out );
 
-#ifdef OPT_2182_MATRIX_SCALE_OPS
 static void matrixTransp2Mul_fx( Word32 Are[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_A*/, Word32 Aim[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_A*/, Word16 *q_A, Word32 Bre[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_B*/, Word32 Bim[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_B*/, Word16 *q_B, Word32 outRe[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_out*/, Word32 outIm[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_out*/, Word16 *q_out );
-#else
-static void matrixTransp2Mul_fx( Word32 Are[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_A*/, Word32 Aim[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_A*/, Word16 *q_A, Word32 Bre[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_B*/, Word32 Bim[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_B*/, Word16 *q_B, Word32 Ascale, Word32 Bscale, Word32 outRe[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_out*/, Word32 outIm[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_out*/, Word16 *q_out );
-#endif
 
 
 /*-------------------------------------------------------------------------
@@ -1198,8 +1194,8 @@ static void ivas_dirac_dec_binaural_internal_fx(
     move16();
     FOR( ch = 0; ch < BINAURAL_CHANNELS; ch++ )
     {
-        scale_sig32( output_fx[ch], imult1616( nBins, hSpatParamRendCom->subframe_nbslots[subframe] ), sub( 11, q_out ) ); // Scaling to Q11
-        scale_sig32( st_ivas->cldfbSynDec[ch]->cldfb_state_fx, st_ivas->cldfbSynDec[ch]->cldfb_size, sub( Q11, st_ivas->cldfbSynDec[ch]->Q_cldfb_state ) );
+        Scale_sig32( output_fx[ch], imult1616( nBins, hSpatParamRendCom->subframe_nbslots[subframe] ), sub( 11, q_out ) ); // Scaling to Q11
+        Scale_sig32( st_ivas->cldfbSynDec[ch]->cldfb_state_fx, st_ivas->cldfbSynDec[ch]->cldfb_size, sub( Q11, st_ivas->cldfbSynDec[ch]->Q_cldfb_state ) );
         st_ivas->cldfbSynDec[ch]->Q_cldfb_state = Q11;
         move16();
     }
@@ -2235,24 +2231,16 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx(
         }
 
         /* Make matrix multiplication M*Cx*M' to determine resulting covariance matrix of processing input with M */
-#ifdef OPT_2182_MATRIX_SCALE_OPS
         matrixScale_fx( Mre_fx, Mim_fx, &q_M );
         matrixScale_fx( CxRe_fx, CxIm_fx, &q_Cx );
-#endif
+
         matrixMul_fx( Mre_fx, Mim_fx, &q_M, CxRe_fx, CxIm_fx, &q_Cx, tmpMtxRe_fx, tmpMtxIm_fx, &q_tmp );
-#ifdef OPT_2182_MATRIX_SCALE_OPS
+
         matrixScale_fx( tmpMtxRe_fx, tmpMtxIm_fx, &q_tmp );
         matrixTransp2Mul_fx(
             tmpMtxRe_fx, tmpMtxIm_fx, &q_tmp,
             Mre_fx, Mim_fx, &q_M,
             resultMtxRe_fx, resultMtxIm_fx, &q_res );
-#else
-        matrixTransp2Mul_fx(
-            tmpMtxRe_fx, tmpMtxIm_fx, &q_tmp, Mre_fx, Mim_fx, &q_M,
-            1 /*int Ascale*/,
-            0 /*int Bscale*/,
-            resultMtxRe_fx, resultMtxIm_fx, &q_res );
-#endif
 
         /* When below the frequency limit where decorrelation is applied, we inject the decorrelated
          * residual (or missing) signal component. The procedure is active when there are not enough independent
@@ -3495,345 +3483,214 @@ static void ivas_dirac_dec_binaural_check_and_switch_transports_headtracked_fx(
     return;
 }
 
-#ifdef NONBE_OPT_2193_EIG2X2
-static Word32 eig2x2_div_fx( Word32 num, Word32 den );
-
-static Word32 eig2x2_div_fx( Word32 num, Word32 den )
+#ifdef NONBE_2169_BINAURAL_MIXING_MATRIX_OPT
+static Word32 Isqrt_newton_iter( Word32 val, Word32 isqrt_val, Word16 q_val, Word16 exp )
 {
-    IF( EQ_32( den, 0x40000000 ) )
-    {
-        return num;
-    }
-    return div_w_newton( num, den );
+    Word32 temp;
+
+    /* newton iteration for y = 1 / sqrt(x)
+     * y = y * 2 * (0.75 - 0.25 * x * y * y) */
+
+    temp = Mpy_32_32( isqrt_val, isqrt_val );
+    temp = Mpy_32_32( val, temp );
+    exp = sub( Q31, sub( q_val, add( exp, exp ) ) );
+
+    temp = L_sub( 0x60000000, L_shr( temp, sub( 2, exp ) ) );
+    temp = Mpy_32_32( isqrt_val, temp );
+    if ( val )
+        isqrt_val = L_shl( temp, 1 );
+
+    return ( isqrt_val );
 }
-#endif
 
 static void eig2x2_fx(
-    const Word32 E1_fx, /*q_E*/
-    const Word32 E2_fx, /*q_E*/
+    Word32 E1_fx, /*q_E*/
+    Word32 E2_fx, /*q_E*/
     Word16 q_E,
-    const Word32 Cre_fx, /*q_C*/
-    const Word32 Cim_fx, /*q_C*/
+    Word32 Cre_fx, /*q_C*/
+    Word32 Cim_fx, /*q_C*/
     Word16 q_C,
-    Word32 Ure_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_U*/
-    Word32 Uim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_U*/
+    Word32 Ure_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_U, 1 bit Headroom */
+    Word32 Uim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_U, 1 bit Headroom */
     Word16 *q_U,
     Word32 D_fx[BINAURAL_CHANNELS], /*q_D*/
     Word16 *q_D )
 {
-#ifdef NONBE_OPT_2193_EIG2X2
-    Word32 pm_fx, add_fx;
-    Word32 tmp1, tmp2, e1, e2, c_re, c_im, c0_im, c1_im;
-    Word32 s0_fx, s1_fx, nval0_fx, nval1_fx;
-    Word64 crossSquare_fx, tmp3, tmp4;
-    Word16 q_crossSquare, q_min, q_diff, q_tmp1, q_tmp2, exp, q_e, q_c;
-    Word16 nval0_q, nval1_q;
-    Word32 i01, i00, i11, i10;
-    Word64 eps_fx = ( (Word64) EPSILON_MANT ) << 32;
-    Word16 eps_q = 63 - EPSILON_EXP;
-    move32();
-    move16();
+    Word16 ch;
+    Word32 _D_fx[BINAURAL_CHANNELS];
+    Word32 add_fx, sub_fx, pm_fx, isqrt_fx, s_fx, s1_fx, s2_fx, normVal_fx, normVal_isqrt_fx;
+    Word64 crossSquare_fx, a_fx;
+    Word16 q_E2, q_C2, q_a, q_min, exp, q_pm, q_s, q_s2, q_norm;
+    Word32 i0, i1;
 
     set32_fx( (Word32 *) Ure_fx, 0, BINAURAL_CHANNELS * BINAURAL_CHANNELS );
     set32_fx( (Word32 *) Uim_fx, 0, BINAURAL_CHANNELS * BINAURAL_CHANNELS );
 
-    exp = sub( get_min_scalefactor( Cre_fx, Cim_fx ), 2 );
-    c_re = L_shl( Cre_fx, exp );
-    c_im = L_shl( Cim_fx, exp );
-    q_c = add( q_C, exp );
+    exp = get_min_scalefactor( E1_fx, E2_fx );
+    E1_fx = L_shl( E1_fx, exp );
+    E2_fx = L_shl( E2_fx, exp );
+    q_E = add( q_E, exp );
+    q_E2 = add( q_E, q_E );
 
-    exp = sub( get_min_scalefactor( E1_fx, E2_fx ), 2 );
-    e1 = L_shl( E1_fx, exp );
-    e2 = L_shl( E2_fx, exp );
-    q_e = add( q_E, exp );
+    exp = get_min_scalefactor( Cre_fx, Cim_fx );
+    Cre_fx = L_shl( Cre_fx, exp );
+    Cim_fx = L_shl( Cim_fx, exp );
+    q_C = add( q_C, exp );
+    q_C2 = add( q_C, q_C );
 
-    // crossSquare_fx = (c_re * c_re) + (c_im * c_im)
-    // a_fx = (e1 + e2) * (e1 + e2) - 4.0f * ((e1 * e2) - crossSquare_fx) = (e1 - e2)^2 + 4 * crossSquare_fx
-    // pm_fx = 0.5f * sqrtf(max(0.0f, a_fx))
-    // add_fx = 0.5f * (e1 + e2)
+    add_fx = W_extract_l( W_shr( W_add( W_deposit32_l( E1_fx ), W_deposit32_l( E2_fx ) ), 1 ) ); /* 0.5 * ( E1 + E2 ) */
+    sub_fx = L_sub( E1_fx, E2_fx );
 
-    tmp1 = L_sub( e1, e2 );
-    tmp3 = W_mult_32_32( tmp1, tmp1 );
-    q_tmp1 = add( add( q_e, q_e ), 1 );
-    if ( !tmp3 )
-    {
-        q_tmp1 = 63;
-        move16();
-    }
+    /*
+     * crossSquare = (Cre * Cre) + (Cim * Cim)
+     */
+    crossSquare_fx = W_add( W_mult0_32_32( Cre_fx, Cre_fx ), W_mult0_32_32( Cim_fx, Cim_fx ) ); /* 2 * q_C */
 
-    crossSquare_fx = W_mac_32_32( W_mult_32_32( c_re, c_re ), c_im, c_im );
-    q_crossSquare = add( add( q_c, q_c ), 1 );
-    if ( !crossSquare_fx )
-    {
-        q_crossSquare = 63;
-        move16();
-    }
+    /*
+     * a = (E1 + E2) * (E1 + E2) - 4.0f * ((E1 * E2) - crossSquare)
+     * a = (E1 - E2) ^ 2 + 4 * crossSquare
+     */
 
-    tmp4 = crossSquare_fx;
-    move64();
-    q_tmp2 = sub( q_crossSquare, 2 );
-    if ( !tmp4 )
-    {
-        q_tmp2 = 63;
-        move16();
-    }
+    a_fx = W_mult0_32_32( sub_fx, sub_fx ); /* 2 * q_E */
 
-    q_diff = sub( q_tmp1, q_tmp2 );
-    q_tmp1 = s_min( q_tmp1, q_tmp2 );
-    if ( q_diff > 0 )
-    {
-        tmp3 = W_shr( tmp3, q_diff );
-    }
-    if ( q_diff < 0 )
+    q_C2 = add( q_C, q_C );
+    q_a = sub( s_min( q_E2, q_C2 ), 2 );
+    a_fx = W_add( W_shr( a_fx, sub( q_E2, q_a ) ), W_shr( crossSquare_fx, sub( q_C2, add( q_a, 2 ) ) ) );
+
+    exp = W_norm( a_fx );
+    a_fx = W_shl( a_fx, exp );
+    q_a = add( q_a, exp );
+    pm_fx = W_extract_h( a_fx );
+    pm_fx = L_max( 0, pm_fx );
+    q_pm = sub( q_a, Q32 );
+    pm_fx = L_max( pm_fx, 0 );
+    if ( !pm_fx )
     {
-        tmp4 = W_shl( tmp4, q_diff );
+        q_pm = Q31;
+        move16();
     }
-    tmp3 = W_add( tmp3, tmp4 );
-    q_diff = W_norm( tmp3 );
-    tmp3 = W_shl( tmp3, q_diff );
-    q_tmp1 = add( q_tmp1, q_diff );
-
-    // pm_fx = 0.5f * sqrtf(max(0.0f, a_fx))
-    exp = sub( 63, q_tmp1 );
-    pm_fx = Sqrt32( L_max( 0, W_extract_h( tmp3 ) ), &exp );
-    pm_fx = L_shr( pm_fx, 1 );
-    q_tmp2 = sub( 31, exp );
-
-    // add_fx = 0.5 * (e1 + e2)
-    add_fx = L_shr( L_add( e1, e2 ), 1 );
-    q_tmp1 = q_e;
-    move16();
 
-    // D[0] = add + pm;
-    // D[1] = max( 0.0f, add - pm );
+    /*
+     *   add = 0.5f * ( E1 + E2 )
+     *   pm = 0.5f * sqrtf( max( 0.0f, a ) )
+     *   add = 0.5f * ( E1 + E2 )
+     *   D[0] = add + pm
+     *   D[1] = max( 0.0f, add - pm )
+     */
 
-    q_diff = sub( q_tmp1, q_tmp2 );
+    /* pm_fx is normalized */
+    exp = sub( Q31, q_pm );
+    isqrt_fx = Isqrt_lc1( pm_fx, &exp );
+    isqrt_fx = Isqrt_newton_iter( pm_fx, isqrt_fx, q_pm, exp );
 
-    tmp1 = add_fx;
-    move32();
-    if ( q_diff > 0 )
-    {
-        tmp1 = L_shr( tmp1, q_diff );
-    }
+    pm_fx = Mpy_32_32( pm_fx, isqrt_fx );
+    q_pm = sub( q_pm, exp );
+    exp = norm_l( pm_fx );
+    pm_fx = L_shl( pm_fx, exp );
+    q_pm = add( q_pm, exp );
+    q_pm = add( q_pm, 1 ); /* 0.5 * sqrt( pm ) */
 
-    tmp2 = pm_fx;
-    move32();
-    if ( q_diff < 0 )
-    {
-        tmp2 = L_shl( tmp2, q_diff );
-    }
+    /* normalize */
+    q_min = s_min( q_pm, q_E );
+    q_min = sub( q_min, 1 );
+    add_fx = L_shr( add_fx, sub( q_E, q_min ) );
+    pm_fx = L_shr( pm_fx, sub( q_pm, q_min ) );
 
-    D_fx[0] = L_add( tmp1, tmp2 );
-    move32();
-    D_fx[1] = L_max( L_sub( tmp1, tmp2 ), 0 );
-    move32();
-    *q_D = s_min( q_tmp1, q_tmp2 );
-    move32();
+    D_fx[0] = L_add_sat( add_fx, pm_fx );
+    D_fx[1] = L_max( 0, L_sub( add_fx, pm_fx ) );
 
-    // Numeric case, when input is practically zeros
-    // if ( D_fx[0] < EPSILON_FX )
+    *q_D = q_min;
+    move16();
 
-    IF( LT_32( L_shl_sat( D_fx[0], sub( 31 - EPSILON_EXP, *q_D ) ), EPSILON_MANT ) )
+    *q_U = Q30;
+    move16();
+    /* Numeric case, when input is practically zeros */
+    IF( LT_32( L_shl_sat( D_fx[0], sub( sub( 31, *q_D ), EPSILON_EXP ) ), EPSILON_MANT ) )
     {
         Ure_fx[0][0] = ONE_IN_Q30;
-        move32();
         Ure_fx[1][1] = ONE_IN_Q30;
-        move32();
-        *q_U = Q30;
-        move16();
         return;
     }
 
-    // Numeric case, when input is near an identity matrix with a gain
-    tmp1 = Mpy_32_32( INV_1000_Q31, add_fx );
-    if ( q_diff > 0 )
-    {
-        tmp1 = L_shr( tmp1, q_diff );
-    }
-
-    IF( LT_32( tmp2, tmp1 ) )
+    /* Numeric case, when input is near an identity matrix with a gain */
+    IF( LT_32( pm_fx, Mpy_32_32( INV_1000_Q31, add_fx ) ) )
     {
         Ure_fx[0][0] = ONE_IN_Q30;
-        move32();
         Ure_fx[1][1] = ONE_IN_Q30;
-        move32();
-        *q_U = Q30;
-        move16();
         return;
     }
 
-    // Eigenvectors
-
-    q_diff = sub( q_e, *q_D );
-    q_tmp1 = s_min( q_e, *q_D );
-
-    tmp1 = D_fx[0];
-    move32();
-    if ( q_diff > 0 )
-    {
-        tmp1 = L_shr( tmp1, q_diff );
-    }
-
-    tmp2 = D_fx[1];
-    move32();
-    if ( q_diff > 0 )
-    {
-        tmp2 = L_shr( tmp2, q_diff );
-    }
+    /* Eigenvectors */
+    q_s = s_min( *q_D, q_E );
+    q_s2 = add( q_s, q_s );
 
-    if ( q_diff < 0 )
-    {
-        e1 = L_shl( e1, q_diff );
-    }
+    _D_fx[0] = L_shr( D_fx[0], sub( *q_D, q_s ) );
+    _D_fx[1] = L_shr( D_fx[1], sub( *q_D, q_s ) );
+    E1_fx = L_shr( E1_fx, sub( q_E, q_s ) );
+    E2_fx = L_shr( E2_fx, sub( q_E, q_s ) );
 
-    if ( q_diff < 0 )
+    FOR( ch = 0; ch < BINAURAL_CHANNELS; ch++ )
     {
-        e2 = L_shl( e2, q_diff );
-    }
-
-    s0_fx = L_sub( tmp1, e1 ); // D_fx[0] - e1
-    tmp1 = L_sub( tmp1, e2 );  // D_fx[0] - e2
-    s1_fx = L_sub( tmp2, e1 ); // D_fx[1] - e1
-    tmp2 = L_sub( tmp2, e2 );  // D_fx[1] - e2
+        s1_fx = L_sub( _D_fx[ch], E1_fx );
+        s2_fx = L_sub( _D_fx[ch], E2_fx );
 
-    i01 = GT_32( L_abs( tmp1 ), L_abs( s0_fx ) ); // fabsf( D_fx[0] - e2 ) > fabsf( D_fx[0] - e1 )
-    i11 = GT_32( L_abs( tmp2 ), L_abs( s1_fx ) ); // fabsf( D_fx[1] - e2 ) > fabsf( D_fx[1] - e1 )
-
-    if ( i01 )
-    {
-        s0_fx = tmp1;
-        move32();
-    }
+        i0 = LE_32( L_abs( s2_fx ), L_abs( s1_fx ) );
+        i1 = L_xor( i0, 1 );
 
-    if ( i11 )
-    {
-        s1_fx = tmp2;
+        s_fx = s1_fx;
         move32();
-    }
-
-    // normVal = sqrtf( 1.0f / ( 1e-12f + crossSquare + s * s ) );
-
-    q_tmp2 = shl( q_tmp1, 1 );
-    q_min = s_min( q_tmp2, q_crossSquare );
-    q_min = s_min( q_min, eps_q );
-
-    q_diff = sub( q_tmp2, q_min );
-    tmp3 = W_shr( W_mult0_32_32( s0_fx, s0_fx ), q_diff );
-    tmp4 = W_shr( W_mult0_32_32( s1_fx, s1_fx ), q_diff );
-
-    q_diff = sub( q_crossSquare, q_min );
-    crossSquare_fx = W_shr( crossSquare_fx, q_diff );
-    tmp3 = W_add( tmp3, crossSquare_fx );
-    tmp4 = W_add( tmp4, crossSquare_fx );
-
-    q_diff = sub( eps_q, q_min );
-    eps_fx = W_shr( eps_fx, q_diff );
-    tmp3 = W_add( tmp3, eps_fx );
-    tmp4 = W_add( tmp4, eps_fx );
-
-    q_diff = W_norm( tmp3 );
-    tmp3 = W_shl( tmp3, q_diff );
-    nval0_q = add( q_min, q_diff );
-
-    q_diff = W_norm( tmp4 );
-    tmp4 = W_shl( tmp4, q_diff );
-    nval1_q = add( q_min, q_diff );
-
-    // nval0_fx = BASOP_Util_Divide3232_Scale_newton( ONE_IN_Q30, W_extract_h( tmp3 ), &exp );
-    // exp = sub( exp, sub( 62, nval0_q ) );
-    //
-    // is equivalent to:
-    //
-    // nval0_fx = div_w_newton( ONE_IN_Q30, W_extract_h( tmp3 ) );
-    // exp = sub( nval0_q, 61 );
-
-    nval0_fx = eig2x2_div_fx( ONE_IN_Q30, W_extract_h( tmp3 ) );
-    exp = sub( nval0_q, 61 );
-    nval0_fx = Sqrt32( nval0_fx, &exp );
-    nval0_q = sub( 31, exp );
-
-    // nval1_fx = BASOP_Util_Divide3232_Scale_newton( ONE_IN_Q30, W_extract_h( tmp4 ), &exp );
-    // exp = sub( exp, sub( 62, nval1_q ) );
-    //
-    // is equivalent to:
-    //
-    // nval1_fx = div_w_newton( ONE_IN_Q30, W_extract_h( tmp4 ) );
-    // exp = sub( nval1_q, 61 );
-
-    nval1_fx = eig2x2_div_fx( ONE_IN_Q30, W_extract_h( tmp4 ) );
-    exp = sub( nval1_q, 61 );
-    nval1_fx = Sqrt32( nval1_fx, &exp );
-    nval1_q = sub( 31, exp );
-
-    q_diff = sub( q_c, q_tmp1 );
-    q_tmp1 = s_min( q_tmp1, q_c );
-
-    if ( q_diff > 0 )
-    {
-        c_re = L_shr( c_re, q_diff );
-    }
-
-    if ( q_diff > 0 )
-    {
-        c_im = L_shr( c_im, q_diff );
-    }
-
-    if ( q_diff < 0 )
-    {
-        s0_fx = L_shl( s0_fx, q_diff );
-    }
-
-    if ( q_diff < 0 )
-    {
-        s1_fx = L_shl( s1_fx, q_diff );
-    }
+        if ( !i0 )
+        {
+            s_fx = s2_fx;
+            move32();
+        }
 
-    q_diff = sub( nval0_q, nval1_q );
-    q_tmp2 = s_min( nval0_q, nval1_q );
+        a_fx = W_mult0_32_32( s_fx, s_fx ); /* 2*q_s */
+        q_min = s_min( q_s2, q_C2 );
+        a_fx = W_shr( a_fx, sub( q_s2, q_min ) );
+        a_fx = W_add( a_fx, W_shr( crossSquare_fx, sub( q_C2, q_min ) ) ); /* add 1e-12 ommitted */
 
-    if ( q_diff > 0 )
-    {
-        nval0_fx = L_shr( nval0_fx, q_diff );
-    }
+        exp = W_norm( a_fx );
+        a_fx = W_shl( a_fx, exp );
+        normVal_fx = W_extract_h( a_fx );
+        q_norm = sub( add( q_min, exp ), Q31 + 1 );
 
-    if ( q_diff < 0 )
-    {
-        nval1_fx = L_shl( nval1_fx, q_diff );
-    }
+        /* normVal_fx is normalized */
+        exp = sub( Q31, q_norm );
+        normVal_isqrt_fx = Isqrt_lc1( normVal_fx, &exp );
+        normVal_isqrt_fx = Isqrt_newton_iter( normVal_fx, normVal_isqrt_fx, q_norm, exp );
 
-    *q_U = sub( add( q_tmp1, q_tmp2 ), 31 );
+        q_norm = add( Q31, exp );
 
-    i00 = L_sub( 1, i01 );
-    i10 = L_sub( 1, i11 );
+        a_fx = W_mult0_32_32( s_fx, normVal_isqrt_fx ); /* q_s + q_norm */
+        a_fx = W_shl( a_fx, sub( q_norm, q_s ) );       /* Q62 */
+        Ure_fx[i0][ch] = W_extract_h( a_fx );           /* Q30 */
 
-    c0_im = c_im;
-    move32();
-    if ( i00 > 0 )
-    {
-        c0_im = L_negate( c0_im );
-    }
+        a_fx = W_mult0_32_32( Cre_fx, normVal_isqrt_fx ); /* q_C + q_norm */
+        a_fx = W_shl( a_fx, sub( q_norm, q_C ) );         /* Q62 */
+        Ure_fx[i1][ch] = W_extract_h( a_fx );             /* Q30 */
 
-    c1_im = c_im;
-    move32();
-    if ( i10 > 0 )
-    {
-        c1_im = L_negate( c1_im );
+        a_fx = W_mult0_32_32( Cim_fx, normVal_isqrt_fx ); /* q_C + q_norm */
+        a_fx = W_shl( a_fx, sub( q_norm, q_C ) );         /* Q62 */
+        if ( i0 )
+            a_fx = W_neg( a_fx );
+        Uim_fx[i1][ch] = W_extract_h( a_fx ); /* Q30 */
     }
-
-    Ure_fx[i00][0] = Mpy_32_32( s0_fx, nval0_fx );
-    move32();
-    Ure_fx[i01][0] = Mpy_32_32( c_re, nval0_fx );
-    move32();
-    Uim_fx[i01][0] = Mpy_32_32( c0_im, nval0_fx );
-    move32();
-
-    Ure_fx[i10][1] = Mpy_32_32( s1_fx, nval1_fx );
-    move32();
-    Ure_fx[i11][1] = Mpy_32_32( c_re, nval1_fx );
-    move32();
-    Uim_fx[i11][1] = Mpy_32_32( c1_im, nval1_fx );
-    move32();
+}
 #else
+static void eig2x2_fx(
+    const Word32 E1_fx, /*q_E*/
+    const Word32 E2_fx, /*q_E*/
+    Word16 q_E,
+    const Word32 Cre_fx, /*q_C*/
+    const Word32 Cim_fx, /*q_C*/
+    Word16 q_C,
+    Word32 Ure_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_U*/
+    Word32 Uim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_U*/
+    Word16 *q_U,
+    Word32 D_fx[BINAURAL_CHANNELS], /*q_D*/
+    Word16 *q_D )
+{
     Word16 chA, chB, ch;
     Word32 s_fx, normVal_fx, crossSquare_fx, a_fx, pm_fx, add_fx;
     Word32 tmp1, tmp2, tmp3, e1, e2, c_re, c_im;
@@ -3872,15 +3729,15 @@ static void eig2x2_fx(
     q_e = add( q_E, exp );
 
     /*crossSquare_fx = (c_re * c_re) + (c_im * c_im)
-    a_fx = (e1 + e2) * (e1 + e2) - 4.0f * ((e1 * e2) - crossSquare_fx) = (e1 - e2)^2 + 4 * crossSquare_fx
-    pm_fx = 0.5f * sqrtf(max(0.0f, a_fx))
-    add_fx = 0.5f * (e1 + e2)*/
+  a_fx = (e1 + e2) * (e1 + e2) - 4.0f * ((e1 * e2) - crossSquare_fx) = (e1 - e2)^2 + 4 * crossSquare_fx
+  pm_fx = 0.5f * sqrtf(max(0.0f, a_fx))
+  add_fx = 0.5f * (e1 + e2)*/
 
     IF( L_and( c_re == 0, c_im == 0 ) )
     {
         /* if c_re = 0 and c_im = 0, then crossSquare_fx = (c_re * c_re) + (c_im * c_im) = 0
-        a_fx = (E1 - E2)^2
-        pm_fx = 0.5 * sqrt(max(0, a_fx)) = 0.5 * max(0, (e1 - e2)) */
+    a_fx = (E1 - E2)^2
+    pm_fx = 0.5 * sqrt(max(0, a_fx)) = 0.5 * max(0, (e1 - e2)) */
         crossSquare_fx = 0;
         move32();
         q_crossSquare = Q31;
@@ -3896,7 +3753,7 @@ static void eig2x2_fx(
         IF( EQ_32( e1, e2 ) )
         {
             /* if e1 - e2 = 0, then a_fx = 4 * crossSquare_fx
-            pm_fx = 0.5 * sqrt(max(0, 4 * crossSquare_fx)) =  sqrt(0, crossSquare_fx)*/
+        pm_fx = 0.5 * sqrt(max(0, 4 * crossSquare_fx)) =  sqrt(0, crossSquare_fx)*/
             test();
             test();
             IF( EQ_32( c_re, 0 ) || LT_32( L_abs( c_re ), ONE_IN_Q15 ) )
@@ -3923,8 +3780,8 @@ static void eig2x2_fx(
         ELSE
         {
             /* if e1, e2 >> c_re, c_im then (e1 - e2)^2 ~ (e1 - e2)^2 + 4 * crossSquare_fx
-               a_fx = (E1 - E2)^2
-               pm_fx = 0.5 * sqrt(max(0, a_fx)) = 0.5 * max(0, (e1 - e2)) */
+     a_fx = (E1 - E2)^2
+     pm_fx = 0.5 * sqrt(max(0, a_fx)) = 0.5 * max(0, (e1 - e2)) */
 
             IF( GT_16( sub( q_c, q_e ), Q15 ) )
             {
@@ -4200,9 +4057,9 @@ static void eig2x2_fx(
         *q_U = q_U_2;
         move16();
     }
-#endif
     return;
 }
+#endif
 
 static void matrixDiagMul_fx(
     Word32 reIn_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_In*/
@@ -4216,9 +4073,7 @@ static void matrixDiagMul_fx(
 {
     Word16 chA, chB;
 
-#ifdef OPT_2185_MATRIX_OUT_SCALING
     Word32 not_zero = 0;
-#endif
 
     FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ )
     {
@@ -4228,35 +4083,22 @@ static void matrixDiagMul_fx(
             imOut_fx[chA][chB] = Mpy_32_32( imIn_fx[chA][chB], D_fx[chB] );
             move32();
             move32();
-#ifdef OPT_2185_MATRIX_OUT_SCALING
             not_zero = L_or( not_zero, reOut_fx[chA][chB] );
             not_zero = L_or( not_zero, imOut_fx[chA][chB] );
-#endif
         }
     }
 
     *q_Out = sub( add( q_In, q_D ), 31 );
     move16();
-#ifdef OPT_2185_MATRIX_OUT_SCALING
     if ( !not_zero )
     {
         *q_Out = Q31;
         move16();
     }
-#else
-    Word16 size = i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS );
-    if ( L_and( is_zero_arr( reOut_fx[0], size ), is_zero_arr( imOut_fx[0], size ) ) )
-    {
-        *q_Out = Q31;
-        move16();
-    }
-#endif
-
 
     return;
 }
 
-#ifdef OPT_2182_MATRIX_SCALE_OPS
 static void matrixScale_fx(
     Word32 Are_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_A*/
     Word32 Aim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_A*/
@@ -4270,7 +4112,6 @@ static void matrixScale_fx(
     *q_A = add( *q_A, shift );
     move16();
 }
-#endif
 
 static void matrixMul_fx(
     Word32 Are_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_A*/
@@ -4283,29 +4124,8 @@ static void matrixMul_fx(
     Word32 outIm_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_out*/
     Word16 *q_out )
 {
-#ifdef OPT_2182_MATRIX_SCALE_OPS
     Word16 chA, chB;
-#else
-    Word16 chA, chB;
-    Word16 min_q_shift1, min_q_shift2;
-    Word16 size = i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS );
-
-    min_q_shift1 = sub( s_min( L_norm_arr( Are_fx[0], size ), L_norm_arr( Aim_fx[0], size ) ), 1 );
-    min_q_shift2 = sub( s_min( L_norm_arr( Bre_fx[0], size ), L_norm_arr( Bim_fx[0], size ) ), 1 );
-
-    scale_sig32( Are_fx[0], size, min_q_shift1 );
-    scale_sig32( Aim_fx[0], size, min_q_shift1 );
-    scale_sig32( Bre_fx[0], size, min_q_shift2 );
-    scale_sig32( Bim_fx[0], size, min_q_shift2 );
-
-    *q_A = add( *q_A, min_q_shift1 );
-    *q_B = add( *q_B, min_q_shift2 );
-    move16();
-    move16();
-#endif
-#ifdef OPT_2185_MATRIX_OUT_SCALING
     Word32 not_zero = 0;
-#endif
 
     FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ )
     {
@@ -4332,32 +4152,18 @@ static void matrixMul_fx(
                                              Are_fx[chA][1], Bim_fx[1][chB] );
             move32();
 #endif /* #ifdef IVAS_ENH64_CADENCE_CHANGES */
-#ifdef OPT_2185_MATRIX_OUT_SCALING
             not_zero = L_or( not_zero, outRe_fx[chA][chB] );
             not_zero = L_or( not_zero, outIm_fx[chA][chB] );
-#endif
         }
     }
 
     *q_out = sub( add( *q_A, *q_B ), 31 );
     move16();
-#ifdef OPT_2185_MATRIX_OUT_SCALING
     if ( !not_zero )
     {
         *q_out = Q31;
         move16();
     }
-#else
-#ifdef OPT_2182_MATRIX_SCALE_OPS
-    Word16 size = BINAURAL_CHANNELS * BINAURAL_CHANNELS;
-#endif
-    if ( L_and( is_zero_arr( outRe_fx[0], size ), is_zero_arr( outIm_fx[0], size ) ) )
-    {
-        *q_out = Q31;
-        move16();
-    }
-#endif
-
     return;
 }
 
@@ -4376,25 +4182,16 @@ static void matrixTransp1Mul_fx(
 
     Word64 tmp_outRe_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS];
     Word64 tmp_outIm_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS];
-#ifdef OPT_2181_MATRIX_TRANSP_1_MUL
     Word64 tmp64;
     Word16 common_lsh, q;
     q = add( add( q_A, q_B ), 1 );
     common_lsh = sub( 63, q );
     move16();
-#else
-    Word16 q_tmp_outRe_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS];
-    Word16 q_tmp_outIm_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS];
-    Word64 tmp64_1, tmp64_2;
-    Word16 tmp16, q_common = 63;
-    move16();
-#endif
 
     FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ )
     {
         FOR( chB = 0; chB < BINAURAL_CHANNELS; chB++ )
         {
-#ifdef OPT_2181_MATRIX_TRANSP_1_MUL
             tmp64 = W_mult_32_32( Are_fx[0][chA], Bre_fx[0][chB] );
             tmp64 = W_mac_32_32( tmp64, Are_fx[1][chA], Bre_fx[1][chB] );
             tmp64 = W_mac_32_32( tmp64, Aim_fx[0][chA], Bim_fx[0][chB] );
@@ -4408,93 +4205,32 @@ static void matrixTransp1Mul_fx(
             tmp_outIm_fx[chA][chB] = W_mac_32_32( tmp64, Are_fx[1][chA], Bim_fx[1][chB] );
             move64();
             common_lsh = s_min( common_lsh, W_norm( tmp_outIm_fx[chA][chB] ) );
-#else
-            tmp64_1 = W_mac_32_32( W_mult_32_32( Are_fx[0][chA], Bre_fx[0][chB] ), Are_fx[1][chA], Bre_fx[1][chB] ); // Q: add( add( q_A, q_B ), 1 )
-            tmp64_2 = W_mac_32_32( W_mult_32_32( Aim_fx[0][chA], Bim_fx[0][chB] ), Aim_fx[1][chA], Bim_fx[1][chB] ); // Q: add( add( q_A, q_B ), 1 )
-            tmp_outRe_fx[chA][chB] = W_add( tmp64_1, tmp64_2 );                                                      // Q: add( add( q_A, q_B ), 1 )
-            move64();
-            tmp16 = W_norm( tmp_outRe_fx[chA][chB] );
-            tmp_outRe_fx[chA][chB] = W_shl( tmp_outRe_fx[chA][chB], tmp16 ); // Q:add( tmp16, add( add( q_A, q_B ), 1 ) )
-            move64();
-            q_tmp_outRe_fx[chA][chB] = add( tmp16, add( add( q_A, q_B ), 1 ) );
-            move16();
-            q_common = s_min( q_tmp_outRe_fx[chA][chB], q_common );
-
-
-            tmp64_1 = W_mac_32_32( W_mult_32_32( Are_fx[0][chA], Bim_fx[0][chB] ), Are_fx[1][chA], Bim_fx[1][chB] ); // Q: add( add( q_A, q_B ), 1 )
-            tmp64_2 = W_mac_32_32( W_mult_32_32( Aim_fx[0][chA], Bre_fx[0][chB] ), Aim_fx[1][chA], Bre_fx[1][chB] ); // Q: add( add( q_A, q_B ), 1 )
-            tmp_outIm_fx[chA][chB] = W_sub( tmp64_1, tmp64_2 );                                                      // Q: add( add( q_A, q_B ), 1 )
-            move64();
-            tmp16 = W_norm( tmp_outIm_fx[chA][chB] );
-            tmp_outIm_fx[chA][chB] = W_shl( tmp_outIm_fx[chA][chB], tmp16 ); // Q:add( tmp16, add( add( q_A, q_B ), 1 ) )
-            move64();
-            q_tmp_outIm_fx[chA][chB] = add( tmp16, add( add( q_A, q_B ), 1 ) );
-            move16();
-            q_common = s_min( q_tmp_outIm_fx[chA][chB], q_common );
-#endif
         }
     }
-#ifdef OPT_2185_MATRIX_OUT_SCALING
     Word32 not_zero = 0;
-#endif
     FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ )
     {
         FOR( chB = 0; chB < BINAURAL_CHANNELS; chB++ )
         {
-#ifdef OPT_2181_MATRIX_TRANSP_1_MUL
             outRe_fx[chA][chB] = W_extract_h( W_shl( tmp_outRe_fx[chA][chB], common_lsh ) );
             move32();
             outIm_fx[chA][chB] = W_extract_h( W_shl( tmp_outIm_fx[chA][chB], common_lsh ) );
             move32();
-#else
-            outRe_fx[chA][chB] = W_extract_h( W_shl( tmp_outRe_fx[chA][chB], s_max( -63, sub( q_common, q_tmp_outRe_fx[chA][chB] ) ) ) );
-            move32();
-            outIm_fx[chA][chB] = W_extract_h( W_shl( tmp_outIm_fx[chA][chB], s_max( -63, sub( q_common, q_tmp_outIm_fx[chA][chB] ) ) ) );
-            move32();
-#endif
-#ifdef OPT_2185_MATRIX_OUT_SCALING
             not_zero = L_or( not_zero, outRe_fx[chA][chB] );
             not_zero = L_or( not_zero, outIm_fx[chA][chB] );
-#endif
         }
     }
-#ifdef OPT_2181_MATRIX_TRANSP_1_MUL
     *q_out = sub( add( q, common_lsh ), 32 );
     move16();
-#else
-    *q_out = sub( q_common, 32 );
-    move16();
-#endif
-#ifdef OPT_2185_MATRIX_OUT_SCALING
     if ( !not_zero )
     {
         *q_out = Q31;
         move16();
     }
-#else
-    Word16 size = i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS );
-    if ( L_and( is_zero_arr( outRe_fx[0], size ), is_zero_arr( outIm_fx[0], size ) ) )
-    {
-        *q_out = Q31;
-        move16();
-    }
-#endif
 
     return;
 }
 
-#ifdef OPT_2182_MATRIX_SCALE_OPS
-static void matrixTransp2Mul_fx(
-    Word32 Are_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_A*/
-    Word32 Aim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_A*/
-    Word16 *q_A,
-    Word32 Bre_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_B*/
-    Word32 Bim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_B*/
-    Word16 *q_B,
-    Word32 outRe_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_out*/
-    Word32 outIm_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_out*/
-    Word16 *q_out )
-#else
 static void matrixTransp2Mul_fx(
     Word32 Are_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_A*/
     Word32 Aim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_A*/
@@ -4502,42 +4238,13 @@ static void matrixTransp2Mul_fx(
     Word32 Bre_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_B*/
     Word32 Bim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_B*/
     Word16 *q_B,
-    Word32 Ascale,
-    Word32 Bscale,
     Word32 outRe_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_out*/
     Word32 outIm_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_out*/
     Word16 *q_out )
-#endif
 {
-#ifdef OPT_2182_MATRIX_SCALE_OPS
     Word16 chA, chB;
     // Word16 size = BINAURAL_CHANNELS * BINAURAL_CHANNELS;
-#else
-    Word16 chA, chB;
-    Word16 min_q_shift;
-    Word16 size = BINAURAL_CHANNELS * BINAURAL_CHANNELS;
-
-    IF( Ascale == 1 )
-    {
-        min_q_shift = sub( s_min( L_norm_arr( Are_fx[0], size ), L_norm_arr( Aim_fx[0], size ) ), 1 );
-        scale_sig32( Are_fx[0], size, min_q_shift );
-        scale_sig32( Aim_fx[0], size, min_q_shift );
-        *q_A = add( *q_A, min_q_shift );
-        move16();
-    }
-
-    IF( Bscale == 1 )
-    {
-        min_q_shift = sub( s_min( L_norm_arr( Bre_fx[0], size ), L_norm_arr( Bim_fx[0], size ) ), 1 );
-        scale_sig32( Bre_fx[0], size, min_q_shift );
-        scale_sig32( Bim_fx[0], size, min_q_shift );
-        *q_B = add( *q_B, min_q_shift );
-        move16();
-    }
-#endif
-#ifdef OPT_2185_MATRIX_OUT_SCALING
     Word32 not_zero = 0;
-#endif
     FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ )
     {
         FOR( chB = 0; chB < BINAURAL_CHANNELS; chB++ )
@@ -4563,36 +4270,555 @@ static void matrixTransp2Mul_fx(
                                              Are_fx[chA][1], Bim_fx[chB][1] );
             move32();
 #endif /* #ifdef IVAS_ENH64_CADENCE_CHANGES */
-#ifdef OPT_2185_MATRIX_OUT_SCALING
             not_zero = L_or( not_zero, outRe_fx[chA][chB] );
             not_zero = L_or( not_zero, outIm_fx[chA][chB] );
-#endif
         }
     }
 
     *q_out = sub( add( *q_A, *q_B ), 31 );
     move16();
-#ifdef OPT_2185_MATRIX_OUT_SCALING
     if ( !not_zero )
     {
         *q_out = Q31;
         move16();
     }
-#else
-#ifdef OPT_2182_MATRIX_SCALE_OPS
-    Word16 size = BINAURAL_CHANNELS * BINAURAL_CHANNELS;
-#endif
-    if ( L_and( is_zero_arr( outRe_fx[0], size ), is_zero_arr( outIm_fx[0], size ) ) )
-    {
-        *q_out = Q31;
-        move16();
-    }
-#endif
 
     return;
 }
 
+#ifdef NONBE_2169_BINAURAL_MIXING_MATRIX_OPT
+static void chol2x2_fx(
+    Word32 E1, /*q_E*/
+    Word32 E2, /*q_E*/
+    Word16 q_E,
+    Word32 Cre, /*q_C*/
+    Word32 Cim, /*q_C*/
+    Word16 q_C,
+    Word32 outRe[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /* q_out, 1 bit Headroom */
+    Word32 outIm[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /* q_out, 1 bit Headroom */
+    Word16 *q_out )
+{
+    Word32 E_tmp, sqrtVal, iSqrtVal;
+    Word16 q_sqrtVal, q_min;
+    Word16 exp = 0;
+    Word16 q_out_0_0, q_out_1_0, q_out_1_1;
+    Word32 i0, i1;
+
+    set32_fx( (Word32 *) outRe, 0, BINAURAL_CHANNELS * BINAURAL_CHANNELS );
+    set32_fx( (Word32 *) outIm, 0, BINAURAL_CHANNELS * BINAURAL_CHANNELS );
+
+    exp = get_min_scalefactor( E1, E2 );
+    E1 = L_shl( E1, exp );
+    E2 = L_shl( E2, exp );
+    q_E = add( q_E, exp );
+
+    exp = get_min_scalefactor( Cre, Cim );
+    Cre = L_shl( Cre, exp );
+    Cim = L_shl( Cim, exp );
+    q_C = add( q_C, exp );
+
+    i0 = LE_32( E1, E2 );
+    i1 = L_xor( i0, 1 );
+
+    /* Perform Cholesky decomposition according to louder channel first */
+    E_tmp = E1;
+    move32();
+
+    if ( i0 )
+    {
+        E1 = E2;
+        move32();
+    }
+    if ( i0 )
+    {
+        E2 = E_tmp;
+        move32();
+    }
+
+    /* outRe[0][0] = sqrt(E1) */
+    exp = sub( Q31, q_E );
+    iSqrtVal = Isqrt_lc1( E1, &exp );
+    iSqrtVal = Isqrt_newton_iter( E1, iSqrtVal, q_E, exp );
+    outRe[i0][i0] = Mpy_32_32( E1, iSqrtVal );
+    q_out_0_0 = sub( q_E, exp );
+
+    /* outRe[1][0] = Cre / (outRe[0][0]);*/
+    q_out_1_0 = Q31;
+    move16();
+    outRe[i1][i0] = Mpy_32_32( Cre, iSqrtVal );
+    if ( Cre != 0 )
+        q_out_1_0 = sub( q_C, exp );
+
+    /* outIm[1][0] = Cim / (outRe[0][0]); */
+    move16();
+    outIm[i1][i0] = Mpy_32_32( Cim, iSqrtVal );
+    if ( i0 )
+        outIm[i1][i0] = L_negate( outIm[i1][i0] );
+    if ( Cim != 0 )
+        q_out_1_0 = sub( q_C, exp );
+
+    /* outRe[i1][i1] = sqrt(max(0.0f, E2 - (Cre * Cre + Cim * Cim) / (E1))) */
+    sqrtVal = W_extract_h( W_add( W_mult0_32_32( Cre, Cre ), W_mult0_32_32( Cim, Cim ) ) );
+
+    /* Special treatment for E1_norm = 0x40000000, Result is known: 2 * sqrtVal */
+    IF( GT_32( E1, 0x40000000 ) )
+    {
+        sqrtVal = div_w_newton( sqrtVal, E1 );
+    }
+    q_sqrtVal = sub( add( q_C, q_C ), add( q_E, 2 ) ); /* q_sqrtVal = 2 *q_C - q_E1_norm - 2 */
+    q_min = s_min( q_E, q_sqrtVal );
+    sqrtVal = L_sub( L_shl( E2, sub( q_min, q_E ) ), L_shl( sqrtVal, sub( q_min, q_sqrtVal ) ) );
+
+    sqrtVal = L_max( 0, sqrtVal );
+
+    if ( sqrtVal == 0 )
+    {
+        q_min = Q31;
+        move16();
+    }
+
+    /* normalize sqrtVal */
+    exp = norm_l( sqrtVal );
+    sqrtVal = L_shl( sqrtVal, exp );
+    q_min = add( q_min, exp );
+    exp = sub( Q31, q_min );
+    iSqrtVal = Isqrt_lc1( sqrtVal, &exp );
+
+    outRe[i1][i1] = Mpy_32_32( sqrtVal, iSqrtVal );
+    q_out_1_1 = sub( q_min, exp );
+
+    /* normalize q_out */
+    *q_out = s_min( s_min( q_out_0_0, q_out_1_1 ), q_out_1_0 );
+    *q_out = sub( *q_out, 1 ); /* 1 bit headroom */
+    move16();
+    outRe[i1][i1] = L_shr( outRe[i1][i1], sub( q_out_1_1, *q_out ) );
+    move32();
+    outRe[i1][i0] = L_shr( outRe[i1][i0], sub( q_out_1_0, *q_out ) );
+    move32();
+    outIm[i1][i0] = L_shr( outIm[i1][i0], sub( q_out_1_0, *q_out ) );
+    move32();
+    outRe[i0][i0] = L_shr( outRe[i0][i0], sub( q_out_0_0, *q_out ) );
+    move32();
+    return;
+}
+
+static void formulate2x2MixingMatrix_fx(
+    Word32 Ein1_fx, /*q_Ein*/
+    Word32 Ein2_fx, /*q_Ein*/
+    Word16 q_Ein,
+    Word32 CinRe_fx, /*q_Cin*/
+    Word32 CinIm_fx, /*q_Cin*/
+    Word16 q_Cin,
+    Word32 Eout1_fx, /*q_Eout*/
+    Word32 Eout2_fx, /*q_Eout*/
+    Word16 q_Eout,
+    Word32 CoutRe_fx, /*q_Cout*/
+    Word32 CoutIm_fx, /*q_Cout*/
+    Word16 q_Cout,
+    Word32 Q_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS],   /* Q31 */
+    Word32 Mre_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /* q_M */
+    Word32 Mim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /* q_M */
+    Word16 *q_M,
+    const Word16 regularizationFactor_fx /* Q14 */ )
+{
+    /*
+     This function implements a 2x2 solution for an optimized spatial audio rendering algorithm
+     Vilkamo, J., Bäckström, T. and Kuntz, A., 2013.
+     "Optimized covariance domain framework for time–frequency processing of spatial audio."
+     Journal of the Audio Engineering Society, 61(6), pp.403-411.
+
+     The result of the formulas below are the same as those in the publication, however, some
+     derivation details differ for as simple as possible 2x2 formulattion
+     */
+    Word16 chA, chB;
+    Word32 maxEneIn_fx, maxEneOut_fx, maxEne_fx, maxEneDiv_fx;
+    Word16 q_maxEne, q_maxEneDiv, exp;
+    Word32 KyRe_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], KyIm_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS];
+    Word32 Uxre_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], Uxim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS];
+    Word32 Sx_fx[BINAURAL_CHANNELS], Sx_Inv_fx[BINAURAL_CHANNELS], Kxre_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], Kxim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS];
+    Word32 tmpRe_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], tmpIm_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS];
+    Word32 Are_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], Aim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS];
+    Word32 Ure_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], Uim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS];
+    Word32 D_fx[BINAURAL_CHANNELS], D_fx_norm;
+    Word32 div_fx[BINAURAL_CHANNELS];
+    Word32 Ghat_fx[BINAURAL_CHANNELS];
+    Word32 GhatQ_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS];
+    Word32 Pre_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], Pim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS];
+    Word16 q_ky, q_Sx, q_Ux, q_Kx, q_A, q_U, q_D, q_P, q_Sx_Inv, q_Sx_0, q_Sx_1, q_Sx_Inv_0, q_Sx_Inv_1;
+    Word32 E_in1, E_in2, E_out1, E_out2, Cout_re, Cout_im, Cin_re, Cin_im;
+    Word16 q_ein, q_eout, q_cin, q_cout, q_Ghat, q_Ghat0, q_Ghat1, q_GhatQ, q_temp, q_div_min, q_div[2], norm_div[2];
+    Word32 denom;
+    Word16 q_Pre[BINAURAL_CHANNELS][BINAURAL_CHANNELS], q_Pim[BINAURAL_CHANNELS][BINAURAL_CHANNELS];
+    Word16 hdrm_re[BINAURAL_CHANNELS][BINAURAL_CHANNELS], hdrm_im[BINAURAL_CHANNELS][BINAURAL_CHANNELS];
+    Word32 i_regularizationFactor_fx;
+    Flag doRegularization;
+    set16_fx( hdrm_re[0], 63, i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ) );
+    set16_fx( hdrm_im[0], 63, i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ) );
+    set16_fx( q_Pre[0], Q31, i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ) );
+    set16_fx( q_Pim[0], Q31, i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ) );
+    q_ky = 0;
+    move16();
+    q_Sx = 0;
+    move16();
+    q_Ux = 0;
+    move16();
+    q_Kx = 0;
+    move16();
+
+    q_ein = q_Ein;
+    move16();
+    q_eout = q_Eout;
+    move16();
+    q_cin = q_Cin;
+    move16();
+    q_cout = q_Cout;
+    move16();
+
+    /* Normalize energy values */
+    maxEneIn_fx = L_max( Ein1_fx, Ein2_fx );
+    move16();
 
+    maxEneOut_fx = L_max( Eout1_fx, Eout2_fx );
+    move16();
+
+    IF( maxEneIn_fx == 0 || maxEneOut_fx == 0 )
+    {
+        Mre_fx[0][0] = Mre_fx[0][1] = Mre_fx[1][0] = Mre_fx[1][1] = 0;
+        move32();
+        move32();
+        move32();
+        move32();
+        Mim_fx[0][0] = Mim_fx[0][1] = Mim_fx[1][0] = Mim_fx[1][1] = 0;
+        move32();
+        move32();
+        move32();
+        move32();
+        *q_M = Q31;
+        move16();
+        return;
+    }
+
+    IF( LT_16( q_eout, q_ein ) )
+    {
+        maxEne_fx = L_max( maxEneOut_fx, L_shr( maxEneIn_fx, sub( q_ein, q_eout ) ) );
+        q_maxEne = q_eout;
+        move16();
+    }
+    ELSE
+    {
+        maxEne_fx = L_max( maxEneIn_fx, L_shr( maxEneOut_fx, sub( q_eout, q_ein ) ) );
+        q_maxEne = q_ein;
+        move16();
+    }
+
+    exp = norm_l( maxEne_fx );
+    maxEne_fx = L_shl( maxEne_fx, exp );
+
+    maxEneDiv_fx = ONE_IN_Q31;
+    move32();
+
+    /* Special treatment for den = 0x40000000, Result is known: 2 * ONE_IN_Q31 */
+    IF( GT_32( maxEne_fx, 0x40000000 ) )
+    {
+        maxEneDiv_fx = div_w_newton( ONE_IN_Q31, maxEne_fx );
+    }
+
+    q_maxEneDiv = sub( sub( Q30, exp ), q_maxEne );
+
+    E_in1 = Mpy_32_32( Ein1_fx, maxEneDiv_fx );
+    E_in2 = Mpy_32_32( Ein2_fx, maxEneDiv_fx );
+    q_ein = add( q_ein, q_maxEneDiv );
+
+    Cin_re = Mpy_32_32( CinRe_fx, maxEneDiv_fx );
+    Cin_im = Mpy_32_32( CinIm_fx, maxEneDiv_fx );
+    q_cin = add( q_cin, q_maxEneDiv );
+
+    E_out1 = Mpy_32_32( Eout1_fx, maxEneDiv_fx );
+    E_out2 = Mpy_32_32( Eout2_fx, maxEneDiv_fx );
+    q_eout = add( q_eout, q_maxEneDiv );
+
+    Cout_re = Mpy_32_32( CoutRe_fx, maxEneDiv_fx );
+    Cout_im = Mpy_32_32( CoutIm_fx, maxEneDiv_fx );
+    q_cout = add( q_cout, q_maxEneDiv );
+
+    /* Cholesky decomposition of target / output covariance matrix */
+    chol2x2_fx( E_out1, E_out2, q_eout, Cout_re, Cout_im, q_cout, KyRe_fx, KyIm_fx, &q_ky );
+    /* Eigendecomposition of input covariance matrix */
+    eig2x2_fx( E_in1, E_in2, q_ein, Cin_re, Cin_im, q_cin, Uxre_fx, Uxim_fx, &q_Ux, Sx_fx, &q_Sx );
+    /* Eigendecomposition to Kx -- Ux Sx Ux' -> Kx Kx'*/
+
+    /* calculate sqrt(Sx_fx[0]) and 1.0 / sqrt(Sx_fx[0]) */
+    exp = Q31;
+    move16();
+    if ( Sx_fx[0] )
+        exp = norm_l( Sx_fx[0] );
+    Sx_fx[0] = L_shl( Sx_fx[0], exp );
+    q_Sx_0 = add( q_Sx, exp );
+    exp = sub( Q31, q_Sx_0 );
+    Sx_Inv_fx[0] = Isqrt_lc1( Sx_fx[0], &exp );
+    Sx_Inv_fx[0] = Isqrt_newton_iter( Sx_fx[0], Sx_Inv_fx[0], q_Sx_0, exp );
+    q_Sx_Inv_0 = sub( Q31, exp );
+    Sx_fx[0] = Mpy_32_32( Sx_fx[0], Sx_Inv_fx[0] );
+    q_Sx_0 = sub( q_Sx_0, exp );
+
+    /* calculate sqrt(Sx_fx[1]) and 1.0 / sqrt(Sx_fx[1]) */
+    exp = Q31;
+    move16();
+    if ( Sx_fx[1] )
+        exp = norm_l( Sx_fx[1] );
+    Sx_fx[1] = L_shl( Sx_fx[1], exp );
+    q_Sx_1 = add( q_Sx, exp );
+    exp = sub( Q31, q_Sx_1 );
+    Sx_Inv_fx[1] = Isqrt_lc1( Sx_fx[1], &exp );
+    Sx_Inv_fx[1] = Isqrt_newton_iter( Sx_fx[1], Sx_Inv_fx[1], q_Sx_1, exp );
+    q_Sx_Inv_1 = sub( Q31, exp );
+    Sx_fx[1] = Mpy_32_32( Sx_fx[1], Sx_Inv_fx[1] );
+    q_Sx_1 = sub( q_Sx_1, exp );
+
+    /* normalization */
+    q_Sx = s_min( q_Sx_0, q_Sx_1 );
+    Sx_fx[0] = L_shr( Sx_fx[0], q_Sx_0 - q_Sx );
+    Sx_fx[1] = L_shr( Sx_fx[1], q_Sx_1 - q_Sx );
+
+    matrixDiagMul_fx( Uxre_fx, Uxim_fx, q_Ux, Sx_fx, q_Sx, Kxre_fx, Kxim_fx, &q_Kx );
+
+    /* Regularize the diagonal Sx for matrix inversion
+       Sx_fx[0] = max( Sx_fx[0], regularizationFactor * Sx_fx[1] )
+       Sx_fx[1] = max( Sx_fx[1], regularizationFactor * Sx_fx[0] )
+
+       Reuse calculated Sx_Inv_fx[0] and Sx_Inv_fx[1] in conjunction with
+       i_regularizationFactor to avoid unnecessary division later by
+       Sx_fx[0] and Sx_fx[1] to calculate nearest orthonormal matrix P
+
+       This is equivalent to
+       Sx_Inv_fx[0] = min( Sx_Inv_fx[0], i_regularizationFactor * Sx_Inv_fx[1] )
+       Sx_Inv_fx[1] = min( Sx_Inv_fx[1], i_regularizationFactor * Sx_Inv_fx[0] )
+    */
+
+    /* Note: since there are only a few regularization factors, can be replaced by table lookup */
+    i_regularizationFactor_fx = L_shl( ONE_IN_Q30 / regularizationFactor_fx, Q15 - Q3 ); /* Q28 */
+
+    doRegularization = LT_32( Sx_fx[0], L_shl_sat( Mpy_32_16_1( Sx_fx[1], regularizationFactor_fx ), 1 ) );
+    if ( doRegularization )
+    {
+        Sx_Inv_fx[0] = Mpy_32_32( i_regularizationFactor_fx, Sx_Inv_fx[1] );
+        move32();
+    }
+    if ( doRegularization )
+    {
+        q_Sx_Inv_0 = q_Sx_Inv_1;
+        move16();
+    }
+    if ( doRegularization )
+        q_Sx_Inv_0 = sub( q_Sx_Inv_0, Q3 );
+
+    doRegularization = LT_32( Sx_fx[1], L_shl_sat( Mpy_32_16_1( Sx_fx[0], regularizationFactor_fx ), 1 ) );
+    if ( doRegularization )
+    {
+        Sx_Inv_fx[1] = Mpy_32_32( i_regularizationFactor_fx, Sx_Inv_fx[0] );
+        move32();
+    }
+    if ( doRegularization )
+    {
+        q_Sx_Inv_1 = q_Sx_Inv_0;
+        move16();
+    }
+    if ( doRegularization )
+        q_Sx_Inv_1 = sub( q_Sx_Inv_1, Q3 );
+
+    /* normalization */
+    exp = norm_l( Sx_Inv_fx[0] );
+    Sx_Inv_fx[0] = L_shl( Sx_Inv_fx[0], exp );
+    q_Sx_Inv_0 = add( q_Sx_Inv_0, exp );
+
+    exp = norm_l( Sx_Inv_fx[1] );
+    Sx_Inv_fx[1] = L_shl( Sx_Inv_fx[1], exp );
+    q_Sx_Inv_1 = add( q_Sx_Inv_1, exp );
+
+    q_Sx_Inv = s_min( q_Sx_Inv_0, q_Sx_Inv_1 );
+    Sx_Inv_fx[0] = L_shr( Sx_Inv_fx[0], q_Sx_Inv_0 - q_Sx_Inv );
+    Sx_Inv_fx[1] = L_shr( Sx_Inv_fx[1], q_Sx_Inv_1 - q_Sx_Inv );
+
+    /* This is equivalent to the prototype signal energy normalization in the publication
+     * Ghat[0] = sqrt(Eout1 / max(Ein1, 0.001f * Ein2))
+     * Ghat[1] = sqrt(Eout2 / max(Ein2, 0.001f * Ein1)) */
+    denom = L_max( E_in1, Mpy_32_32( E_in2, INV_1000_Q31 ) );
+    exp = norm_l( denom );
+    denom = L_shl( denom, exp );
+
+    Ghat_fx[0] = E_out1;
+    move32();
+
+    /* Special treatment for denom = 0x40000000, Result is known: 2 * E_out1 */
+    IF( GT_32( denom, 0x40000000 ) )
+    {
+        Ghat_fx[0] = div_w_newton( E_out1, denom );
+    }
+    exp = add( add( exp, 1 ), sub( q_ein, q_eout ) );
+    Ghat_fx[0] = Sqrt32( Ghat_fx[0], &exp );
+    q_Ghat0 = sub( Q31, exp );
+
+    denom = L_max( E_in2, Mpy_32_32( E_in1, INV_1000_Q31 ) );
+    exp = norm_l( denom );
+    denom = L_shl( denom, exp );
+
+    Ghat_fx[1] = E_out2;
+    move32();
+
+    /* Special treatment for denom = 0x40000000, Result is known: 2 * E_out2 */
+    IF( GT_32( denom, 0x40000000 ) )
+    {
+        Ghat_fx[1] = div_w_newton( E_out2, denom );
+    }
+    exp = add( add( exp, 1 ), sub( q_ein, q_eout ) );
+    Ghat_fx[1] = Sqrt32( Ghat_fx[1], &exp );
+    q_Ghat1 = sub( Q31, exp );
+
+    /* normalize q_Ghat */
+    q_Ghat = s_min( q_Ghat0, q_Ghat1 );
+
+    Ghat_fx[0] = L_shr( Ghat_fx[0], sub( q_Ghat0, q_Ghat ) );
+    move32();
+    Ghat_fx[1] = L_shr( Ghat_fx[1], sub( q_Ghat1, q_Ghat ) );
+    move32();
+
+    /* Matrix multiplication, tmp = Ky' * G_hat * Q */
+    FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ )
+    {
+        GhatQ_fx[chA][0] = Mpy_32_32( Q_fx[chA][0], Ghat_fx[chA] );
+        GhatQ_fx[chA][1] = Mpy_32_32( Q_fx[chA][1], Ghat_fx[chA] );
+        move32();
+        move32();
+    }
+    q_GhatQ = sub( add( Q31, q_Ghat ), 31 );
+
+    FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ )
+    {
+        FOR( chB = 0; chB < BINAURAL_CHANNELS; chB++ )
+        {
+            tmpRe_fx[chA][chB] = Madd_32_32( Mpy_32_32( KyRe_fx[0][chA], GhatQ_fx[0][chB] ), KyRe_fx[1][chA], GhatQ_fx[1][chB] );
+            tmpIm_fx[chA][chB] = Msub_32_32( Mpy_32_32( L_negate( KyIm_fx[0][chA] ), GhatQ_fx[0][chB] ), KyIm_fx[1][chA], GhatQ_fx[1][chB] );
+            move32();
+            move32();
+        }
+    }
+
+    q_temp = sub( add( q_ky, q_GhatQ ), 31 );
+    matrixScale_fx( tmpRe_fx, tmpIm_fx, &q_temp );
+
+    /* A = Ky' * G_hat * Q * Kx (see publication) */
+    matrixMul_fx( tmpRe_fx, tmpIm_fx, &q_temp,
+                  Kxre_fx, Kxim_fx, &q_Kx, /* Kxre_fx, Kxim_fx have 1 bit headroom and are normalized, no scaling neccessary */
+                  Are_fx, Aim_fx, &q_A );
+
+    /* Find nearest orthonormal matrix P to A = Ky' * G_hat * Q * Kx
+       For matrix A that is P = A(A'A)^0.5 */
+    matrixTransp1Mul_fx( Are_fx, Aim_fx, q_A, Are_fx, Aim_fx, q_A, tmpRe_fx, tmpIm_fx, &q_temp );
+
+    eig2x2_fx( tmpRe_fx[0][0], tmpRe_fx[1][1], q_temp, tmpRe_fx[1][0], tmpIm_fx[1][0], q_temp, Ure_fx, Uim_fx, &q_U, D_fx, &q_D );
+
+    div_fx[0] = TEN_THOUSAND_Q17;
+    move32();
+    q_div[0] = Q17;
+    move16();
+
+    exp = norm_l( D_fx[0] );
+    D_fx_norm = L_shl( D_fx[0], exp );
+    exp = add( q_D, exp );
+
+    /* check if D_fx[0] > 1.0 / (10000.0 * 10000.0) */
+    IF( GT_32( D_fx_norm, 0 ) && ( ( LT_32( exp, 57 ) || ( EQ_32( exp, 57 ) && GT_32( D_fx_norm, ONE_DIV_100000000_Q57 ) ) ) ) )
+    {
+        exp = sub( 31, exp );
+        div_fx[0] = Isqrt_lc1( D_fx_norm, &exp );
+        q_div[0] = sub( Q31, exp );
+    }
+
+    div_fx[1] = TEN_THOUSAND_Q17;
+    move32();
+    q_div[1] = Q17;
+    move16();
+
+    exp = norm_l( D_fx[1] );
+    D_fx_norm = L_shl( D_fx[1], exp );
+    exp = add( q_D, exp );
+
+    /* check if D_fx[1] > 1.0 / (10000.0 * 10000.0) */
+    IF( GT_32( D_fx_norm, 0 ) && ( ( LT_32( exp, 57 ) || ( EQ_32( exp, 57 ) && GT_32( D_fx_norm, ONE_DIV_100000000_Q57 ) ) ) ) )
+    {
+        exp = sub( Q31, exp );
+        div_fx[1] = Isqrt_lc1( D_fx_norm, &exp );
+        q_div[1] = sub( Q31, exp );
+    }
+
+    matrixMul_fx( Are_fx, Aim_fx, &q_A,
+                  Ure_fx, Uim_fx, &q_U, /* Ure_fx, Uim_fx have 1 bit headroom and are normalized, no scaling neccessary */
+                  tmpRe_fx, tmpIm_fx, &q_temp );
+
+    norm_div[0] = Q31;
+    move16();
+    norm_div[1] = Q31;
+    move16();
+
+    FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ )
+    {
+        FOR( chB = 0; chB < BINAURAL_CHANNELS; chB++ )
+        {
+            tmpRe_fx[chA][chB] = Mpy_32_32( tmpRe_fx[chA][chB], div_fx[chB] );
+            exp = norm_l( tmpRe_fx[chA][chB] );
+            if ( tmpRe_fx[chA][chB] )
+                norm_div[chB] = s_min( norm_div[chB], exp );
+
+            tmpIm_fx[chA][chB] = Mpy_32_32( tmpIm_fx[chA][chB], div_fx[chB] );
+            exp = norm_l( tmpIm_fx[chA][chB] );
+            if ( tmpIm_fx[chA][chB] )
+                norm_div[chB] = s_min( norm_div[chB], exp );
+        }
+    }
+
+    /* normalize */
+    q_div_min = s_min( add( q_div[0], norm_div[0] ), add( q_div[1], norm_div[1] ) );
+    q_div_min = sub( q_div_min, 1 ); /* 1 bit headroom */
+
+    FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ )
+    {
+        FOR( chB = 0; chB < BINAURAL_CHANNELS; chB++ )
+        {
+            tmpRe_fx[chA][chB] = L_shr( tmpRe_fx[chA][chB], q_div[chB] - q_div_min );
+            tmpIm_fx[chA][chB] = L_shr( tmpIm_fx[chA][chB], q_div[chB] - q_div_min );
+        }
+    }
+    q_temp = sub( add( q_temp, q_div_min ), Q31 );
+
+    matrixTransp2Mul_fx(
+        tmpRe_fx, tmpIm_fx, &q_temp, /* tmpRe_fx, tmpIm_fx have 1 bit headroom and are normalized, no scaling neccessary */
+        Ure_fx, Uim_fx, &q_U,        /* Ure_fx, Uim_fx have 1 bit headroom and are normalized, no scaling neccessary */
+        Pre_fx, Pim_fx, &q_P );      /* Nearest orthonormal matrix P to matrix A formulated */
+
+    /* These are the final formulas of the JAES publication M = Ky P Kx^(-1) */
+
+    for ( chA = 0; chA < BINAURAL_CHANNELS; chA++ )
+    {
+        for ( chB = 0; chB < BINAURAL_CHANNELS; chB++ )
+        {
+            Pre_fx[chA][chB] = Mpy_32_32( Pre_fx[chA][chB], Sx_Inv_fx[chB] );
+            Pim_fx[chA][chB] = Mpy_32_32( Pim_fx[chA][chB], Sx_Inv_fx[chB] );
+        }
+    }
+    q_P = sub( q_P, sub( Q31, q_Sx_Inv ) );
+
+    matrixMul_fx( KyRe_fx, KyIm_fx, &q_ky, /* KyRe_fx, KyIm_fx have 1 bit headroom and are normalized, no scaling neccessary */
+                  Pre_fx, Pim_fx, &q_P,    /* Pre_fx, Pim_fx have 1 bit headroom and are normalized, no scaling neccessary */
+                  tmpRe_fx, tmpIm_fx, &q_temp );
+
+    matrixTransp2Mul_fx(
+        tmpRe_fx, tmpIm_fx, &q_temp,
+        Uxre_fx, Uxim_fx, &q_Ux, /* Ure_fx, Uim_fx have 1 bit headroom and are normalized, no scaling neccessary */
+        Mre_fx, Mim_fx, q_M );
+    return;
+}
+
+#else
 static void chol2x2_fx(
     const Word32 E1, /*q_E*/
     const Word32 E2, /*q_E*/
@@ -4837,6 +5063,7 @@ static void chol2x2_fx(
 
     return;
 }
+
 static void formulate2x2MixingMatrix_fx(
     Word32 Ein1_fx, /*q_Ein*/
     Word32 Ein2_fx, /*q_Ein*/
@@ -5090,10 +5317,9 @@ static void formulate2x2MixingMatrix_fx(
     q_temp = sub( add( q_ky, q_GhatQ ), 31 );
 
     /* A = Ky' * G_hat * Q * Kx (see publication) */
-#ifdef OPT_2182_MATRIX_SCALE_OPS
     matrixScale_fx( tmpRe_fx, tmpIm_fx, &q_temp );
     matrixScale_fx( Kxre_fx, Kxim_fx, &q_Kx );
-#endif
+
     matrixMul_fx( tmpRe_fx, tmpIm_fx, &q_temp, Kxre_fx, Kxim_fx, &q_Kx, Are_fx, Aim_fx, &q_A );
 
     /* Find nearest orthonormal matrix P to A = Ky' * G_hat * Q * Kx
@@ -5151,10 +5377,9 @@ static void formulate2x2MixingMatrix_fx(
     div_fx[0] = L_min( div_fx[0], thresh );                     // q_div
     div_fx[1] = L_min( div_fx[1], thresh );                     // q_div
 
-#ifdef OPT_2182_MATRIX_SCALE_OPS
     matrixScale_fx( Are_fx, Aim_fx, &q_A );
     matrixScale_fx( Ure_fx, Uim_fx, &q_U );
-#endif
+
     matrixMul_fx( Are_fx, Aim_fx, &q_A, Ure_fx, Uim_fx, &q_U, tmpRe_fx, tmpIm_fx, &q_temp );
 
     exp = L_norm_arr( div_fx, BINAURAL_CHANNELS );
@@ -5218,17 +5443,10 @@ static void formulate2x2MixingMatrix_fx(
         }
     }
 
-#ifdef OPT_2182_MATRIX_SCALE_OPS
     matrixTransp2Mul_fx(
         tmpRe_fx, tmpIm_fx, &q_temp,
         Ure_fx, Uim_fx, &q_U,
         Pre_fx, Pim_fx, &q_P ); /* Nearest orthonormal matrix P to matrix A formulated */
-#else
-    matrixTransp2Mul_fx( tmpRe_fx, tmpIm_fx, &q_temp, Ure_fx, Uim_fx, &q_U,
-                         0 /*int Ascale*/,
-                         0 /*int Bscale*/,
-                         Pre_fx, Pim_fx, &q_P ); /* Nearest orthonormal matrix P to matrix A formulated */
-#endif
 
     /* These are the final formulas of the JAES publication M = Ky P Kx^(-1) */
 #if ( BINAURAL_CHANNELS != 2 )
@@ -5359,27 +5577,18 @@ static void formulate2x2MixingMatrix_fx(
         }
     }
 
-#ifdef OPT_2182_MATRIX_SCALE_OPS
     matrixScale_fx( KyRe_fx, KyIm_fx, &q_ky );
     matrixScale_fx( Pre_fx, Pim_fx, &q_P );
-#endif
     matrixMul_fx( KyRe_fx, KyIm_fx, &q_ky, Pre_fx, Pim_fx, &q_P, tmpRe_fx, tmpIm_fx, &q_temp );
-#ifdef OPT_2182_MATRIX_SCALE_OPS
     matrixScale_fx( tmpRe_fx, tmpIm_fx, &q_temp );
     matrixTransp2Mul_fx(
         tmpRe_fx, tmpIm_fx, &q_temp,
         Uxre_fx, Uxim_fx, &q_Ux,
         Mre_fx, Mim_fx, q_M );
-#else
-    matrixTransp2Mul_fx( tmpRe_fx, tmpIm_fx, &q_temp, Uxre_fx, Uxim_fx, &q_Ux,
-                         1 /*int Ascale*/,
-                         0 /*int Bscale*/,
-                         Mre_fx, Mim_fx, q_M );
-#endif
 
     return;
 }
-
+#endif
 
 static void getDirectPartGains_fx(
     const Word16 bin,
@@ -5645,41 +5854,40 @@ static void hrtfShGetHrtf_fx(
  *------------------------------------------------------------------------*/
 
 /*! r: Configured reqularization factor value */
-
 Word16 configure_reqularization_factor_fx(
     const IVAS_FORMAT ivas_format, /* i  : IVAS format          */
     const Word32 ivas_total_brate  /* i  : IVAS total bitrate  */
 )
 {
     Word16 reqularizationFactor;
-    reqularizationFactor = 16384; /* Default value */ /*Q14*/
+    reqularizationFactor = 16384; /* Default value 1.0 Q14 */
     move16();
 
     IF( EQ_32( ivas_format, MASA_FORMAT ) )
     {
         IF( GE_32( ivas_total_brate, IVAS_160k ) )
         {
-            reqularizationFactor = 6553; /*Q14*/
+            reqularizationFactor = 6553; /* 0.4 Q14 */
             move16();
         }
         ELSE IF( EQ_32( ivas_total_brate, IVAS_128k ) )
         {
-            reqularizationFactor = 8192; /*Q14*/
+            reqularizationFactor = 8192; /*0.5 Q14 */
             move16();
         }
         ELSE IF( EQ_32( ivas_total_brate, IVAS_96k ) )
         {
-            reqularizationFactor = 9830; /*Q14*/
+            reqularizationFactor = 9830; /* 0.6 *Q14 */
             move16();
         }
         ELSE IF( GE_32( ivas_total_brate, IVAS_64k ) )
         {
-            reqularizationFactor = 13107; /*Q14*/
+            reqularizationFactor = 13107; /* 0.8 Q14 */
             move16();
         }
         ELSE
         {
-            reqularizationFactor = 16384; /*Q14*/
+            reqularizationFactor = 16384; /* 1.0 Q14 */
             move16();
         }
     }
@@ -5688,27 +5896,27 @@ Word16 configure_reqularization_factor_fx(
     {
         IF( GE_32( ivas_total_brate, IVAS_96k ) )
         {
-            reqularizationFactor = 6553; /*Q14*/
+            reqularizationFactor = 6553; /* 0.4 Q14 */
             move16();
         }
         ELSE IF( GE_32( ivas_total_brate, IVAS_80k ) )
         {
-            reqularizationFactor = 8192; /*Q14*/
+            reqularizationFactor = 8192; /*0.5 Q14 */
             move16();
         }
         ELSE IF( GE_32( ivas_total_brate, IVAS_64k ) )
         {
-            reqularizationFactor = 11468; /*Q14*/
+            reqularizationFactor = 11468; /* 0.7 Q14 */
             move16();
         }
         ELSE IF( GE_32( ivas_total_brate, IVAS_48k ) )
         {
-            reqularizationFactor = 13107; /*Q14*/
+            reqularizationFactor = 13107; /*  0.8 Q14 */
             move16();
         }
         ELSE
         {
-            reqularizationFactor = 16384; /*Q14*/
+            reqularizationFactor = 16384; /* 1.0 Q14 */
             move16();
         }
     }