From 772490d57cfe07396224396a111bcabfcc3f4bf0 Mon Sep 17 00:00:00 2001
From: Markus Multrus <markus.multrus@iis.fraunhofer.de>
Date: Sun, 15 Dec 2024 22:24:43 +0100
Subject: [PATCH 1/3] optimize ivas_dirac_dec_binaural_functions.c

---
 lib_com/options.h                            |   2 +
 lib_rend/ivas_dirac_dec_binaural_functions.c | 269 ++++++++++++++++++-
 2 files changed, 267 insertions(+), 4 deletions(-)

diff --git a/lib_com/options.h b/lib_com/options.h
index 2a218e87d..5b6855dbd 100644
--- a/lib_com/options.h
+++ b/lib_com/options.h
@@ -118,4 +118,6 @@
 #define FIX_ISSUE_1156                          /* Ittiam: Fix for Issue 1156: Encoder crash for Stereo at 32kbps in SWB_BWE_encoding_ivas_fx() */
 #define FIX_DISCLAIMER                          /* VA: Add disclaimer for external renderer + Add info about IVAS reference version (FLP issue 1225) */
 #define FIX_ISSUE_1167                          /* Ittiam: Fix for Issue 1167: Encoder crash for OSBA ISM3SBA1 at 13.2 and 16.4 kbps in gauss_L2_ivas_fx() */
+#define FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC   /* FhG: Reduce workload of binaural rendering: replace 1./tmp & sqrt by Isqrt32 */
+#define FIX_1113_OPT_DIRAC_BIN_REND             /* FhG: Various optimizations to ivas_dirac_dec_binaual_functions.c */
 #endif
diff --git a/lib_rend/ivas_dirac_dec_binaural_functions.c b/lib_rend/ivas_dirac_dec_binaural_functions.c
index 204ba1996..433da6188 100644
--- a/lib_rend/ivas_dirac_dec_binaural_functions.c
+++ b/lib_rend/ivas_dirac_dec_binaural_functions.c
@@ -31,9 +31,9 @@
 *******************************************************************************************************/
 
 #include <stdint.h>
+#include "options.h"
 #include <assert.h>
 #include <math.h>
-#include "options.h"
 #include "prot.h"
 #include "prot_fx.h"
 #include "ivas_prot.h"
@@ -72,9 +72,13 @@ Word16 slot_fx[4] = { 32767, 16384, 10922, 8192 };
 #define LOG_10_BASE_2_Q29          1783446528 // Q29
 #define TAN_30_FX                  17157      // Q15
 #define INV_TAN30_FX               28377      // Q14
-#define EPSILON_MANT               1180591621 /* 1e-12 in Q70 */
+#define EPSILON_MANT               1180591621 /* 1e-12 = 0.5497558*(2^-39) in Q70 */
 #define EPSILON_EXP                ( -39 )
-#define ADAPT_HTPROTO_ROT_LIM_1    0.8f
+#ifdef FIX_1113_OPT_DIRAC_BIN_REND
+#define ONE_DIV_EPSILON_MANT 1953125000 /* 1e+12 = 0.9094947*(2^40) */
+#define ONE_DIV_EPSILON_EXP  ( 40 )
+#endif
+#define ADAPT_HTPROTO_ROT_LIM_1 0.8f
 
 #define MAX_GAIN_CACHE_SIZE ( ( MASA_MAXIMUM_DIRECTIONS * 3 ) + MAX_NUM_OBJECTS ) /* == different calls to get gains */
 
@@ -3278,10 +3282,17 @@ static void eig2x2_fx(
 
             tmp3 = BASOP_Util_Add_Mant32Exp( tmp2, sub( 31, q_tmp2 ), epsilon_mant, epsilon_exp, &exp_tmp3 );
 
+#if 1
             tmp2 = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, tmp3, &exp );
             exp = sub( exp, sub( Q30, sub( 31, exp_tmp3 ) ) );
             normVal_fx = Sqrt32( tmp2, &exp ); // q_tmp2
             q_tmp2 = sub( 31, exp );
+#else
+            /* Note: This code part does not work yet, see pipeline issue for BASOP #1009  */
+            /* although the same code works at other places: mantissa and q_format is fine */
+            normVal_fx = ISqrt32( tmp3, &exp );
+            q_tmp2 = sub( 31, exp );
+#endif
 
             IF( LT_16( q_tmp1, q_c ) )
             {
@@ -3347,10 +3358,15 @@ static void eig2x2_fx(
 
             tmp3 = BASOP_Util_Add_Mant32Exp( tmp2, sub( 31, q_tmp2 ), epsilon_mant, epsilon_exp, &exp_tmp3 );
 
+#ifndef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC
             tmp2 = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, tmp3, &exp );
             exp = sub( exp, sub( Q30, sub( 31, exp_tmp3 ) ) );
             normVal_fx = Sqrt32( tmp2, &exp ); // q_tmp2
             q_tmp2 = sub( 31, exp );
+#else
+            normVal_fx = ISqrt32( tmp3, &exp_tmp3 );
+            q_tmp2 = sub( 31, exp_tmp3 );
+#endif
 
             IF( LT_16( q_tmp1, q_c ) )
             {
@@ -3405,9 +3421,13 @@ static void eig2x2_fx(
     }
 
     IF( q_U_1 != 0 )
-    *q_U = q_U_1;
+    {
+        *q_U = q_U_1;
+    }
     ELSE
+    {
         *q_U = q_U_2;
+    }
     move16();
 
     return;
@@ -3463,8 +3483,10 @@ static void matrixMul_fx(
     Word16 chA, chB;
     Word16 min_q_shift1, min_q_shift2;
     Word16 size = i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS );
+#ifndef FIX_1113_OPT_DIRAC_BIN_REND
 #ifndef IVAS_ENH64_CADENCE_CHANGES
     Word32 tmp1, tmp2;
+#endif
 #endif
 
     min_q_shift1 = sub( s_min( L_norm_arr( Are_fx[0], size ), L_norm_arr( Aim_fx[0], size ) ), 1 );
@@ -3493,6 +3515,18 @@ static void matrixMul_fx(
             move32();
             outIm_fx[chA][chB] = L_add( outIm_fx[chA][chB], W_extract_h( W_mac_32_32( W_mult_32_32( Are_fx[chA][0], Bim_fx[0][chB] ), Are_fx[chA][1], Bim_fx[1][chB] ) ) );
             move32();
+#else
+#ifdef FIX_1113_OPT_DIRAC_BIN_REND
+            outRe_fx[chA][chB] = Msub_32_32( Msub_32_32( Madd_32_32( Mpy_32_32( Are_fx[chA][0], Bre_fx[0][chB] ),
+                                                                     Are_fx[chA][1], Bre_fx[1][chB] ),
+                                                         Aim_fx[chA][0], Bim_fx[0][chB] ),
+                                             Aim_fx[chA][1], Bim_fx[1][chB] );
+            move32();
+            outIm_fx[chA][chB] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Aim_fx[chA][0], Bre_fx[0][chB] ),
+                                                                     Aim_fx[chA][1], Bre_fx[1][chB] ),
+                                                         Are_fx[chA][0], Bim_fx[0][chB] ),
+                                             Are_fx[chA][1], Bim_fx[1][chB] );
+            move32();
 #else
             test();
             test();
@@ -3592,6 +3626,7 @@ static void matrixMul_fx(
             }
             outIm_fx[chA][chB] = L_add( outIm_fx[chA][chB], L_add( tmp1, tmp2 ) );
             move32();
+#endif
 #endif /* #ifdef IVAS_ENH64_CADENCE_CHANGES */
         }
     }
@@ -3620,12 +3655,26 @@ static void matrixTransp1Mul_fx(
 {
     Word16 chA, chB;
     Word16 size = i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS );
+#ifndef FIX_1113_OPT_DIRAC_BIN_REND
     Word32 tmp1, tmp2;
+#endif
 
     FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ )
     {
         FOR( chB = 0; chB < BINAURAL_CHANNELS; chB++ )
         {
+#ifdef FIX_1113_OPT_DIRAC_BIN_REND
+            outRe_fx[chA][chB] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Are_fx[0][chA], Bre_fx[0][chB] ),
+                                                                     Are_fx[1][chA], Bre_fx[1][chB] ),
+                                                         Aim_fx[0][chA], Bim_fx[0][chB] ),
+                                             Aim_fx[1][chA], Bim_fx[1][chB] );
+            move32();
+            outIm_fx[chA][chB] = Msub_32_32( Msub_32_32( Madd_32_32( Mpy_32_32( Are_fx[0][chA], Bim_fx[0][chB] ),
+                                                                     Are_fx[1][chA], Bim_fx[1][chB] ),
+                                                         Aim_fx[0][chA], Bre_fx[0][chB] ),
+                                             Aim_fx[1][chA], Bre_fx[1][chB] );
+            move32();
+#else
             test();
             test();
             test();
@@ -3692,6 +3741,7 @@ static void matrixTransp1Mul_fx(
                 tmp2 = L_negate( Mpy_32_32( L_abs( Are_fx[1][chA] ), L_abs( Bim_fx[1][chB] ) ) );
             outIm_fx[chA][chB] = L_add( outIm_fx[chA][chB], L_add( tmp1, tmp2 ) );
             move32();
+#endif
         }
     }
     *q_out = sub( add( q_A, q_B ), 31 );
@@ -3720,8 +3770,10 @@ static void matrixTransp2Mul_fx(
     Word16 chA, chB;
     Word16 min_q_shift;
     Word16 size = i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS );
+#ifndef FIX_1113_OPT_DIRAC_BIN_REND
 #ifndef IVAS_ENH64_CADENCE_CHANGES
     Word32 tmp1, tmp2;
+#endif
 #endif
 
     min_q_shift = sub( s_min( L_norm_arr( Are_fx[0], size ), L_norm_arr( Aim_fx[0], size ) ), 1 );
@@ -3748,6 +3800,18 @@ static void matrixTransp2Mul_fx(
             move32();
             outIm_fx[chA][chB] = L_add( outIm_fx[chA][chB], W_extract_h( W_mac_32_32( W_mult_32_32( Are_fx[chA][0], L_negate( Bim_fx[chB][0] ) ), Are_fx[chA][1], L_negate( Bim_fx[chB][1] ) ) ) );
             move32();
+#else
+#ifdef FIX_1113_OPT_DIRAC_BIN_REND
+            outRe_fx[chA][chB] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Are_fx[chA][0], Bre_fx[chB][0] ),
+                                                                     Are_fx[chA][1], Bre_fx[chB][1] ),
+                                                         Aim_fx[chA][0], Bim_fx[chB][0] ),
+                                             Aim_fx[chA][1], Bim_fx[chB][1] );
+            move32();
+            outIm_fx[chA][chB] = Msub_32_32( Msub_32_32( Madd_32_32( Mpy_32_32( Aim_fx[chA][0], Bre_fx[chB][0] ),
+                                                                     Aim_fx[chA][1], Bre_fx[chB][1] ),
+                                                         Are_fx[chA][0], Bim_fx[chB][0] ),
+                                             Are_fx[chA][1], Bim_fx[chB][1] );
+            move32();
 #else
             test();
             test();
@@ -3816,6 +3880,7 @@ static void matrixTransp2Mul_fx(
                 tmp2 = L_negate( Mpy_32_32( L_abs( Are_fx[chA][1] ), L_abs( -Bim_fx[chB][1] ) ) );
             outIm_fx[chA][chB] = L_add( outIm_fx[chA][chB], L_add( tmp1, tmp2 ) );
             move32();
+#endif
 #endif /* #ifdef IVAS_ENH64_CADENCE_CHANGES */
         }
     }
@@ -3890,6 +3955,7 @@ static void chol2x2_fx(
         }
         ELSE
         {
+#ifndef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC
             outRe[1][0] = BASOP_Util_Divide3232_Scale_cadence( c_re, outRe[0][0], &exp );
             move32();
             q_re2 = add( sub( 31, exp ), sub( q_c, q_re1 ) );
@@ -3897,6 +3963,33 @@ static void chol2x2_fx(
             outIm[1][0] = BASOP_Util_Divide3232_Scale_cadence( c_im, outRe[0][0], &exp );
             move32();
             q_im = add( sub( 31, exp ), sub( q_c, q_re1 ) );
+#else
+            Word32 denom;
+            Word16 den_exp;
+            Word32 my_outRe, my_outIm;
+
+            /* Compute denom = 1.0 / outRe[0][0] */
+            denom = ISqrt32( outRe[0][0], &exp );
+            denom = Mpy_32_32( denom, denom );
+            den_exp = shl( exp, 1 );
+
+            /* Normalise c_re, c_im */
+            exp = norm_l( c_re );
+            my_outRe = L_shl( c_re, exp );
+            q_re2 = add( q_c, exp );
+            exp = norm_l( c_im );
+            my_outIm = L_shl( c_im, exp );
+            q_im = add( q_c, exp );
+
+            /* Multiply and store c_re*denom and c_im*denom */
+            outRe[1][0] = Mpy_32_32( denom, my_outRe );
+            move32();
+            q_re2 = sub( q_re2, den_exp );
+
+            outIm[1][0] = Mpy_32_32( denom, my_outIm );
+            move32();
+            q_im = sub( q_im, den_exp );
+#endif
         }
         if ( outRe[1][0] == 0 )
         {
@@ -3915,8 +4008,16 @@ static void chol2x2_fx(
         // 4611686 = Q62
         IF( e1 == 0 )
         {
+#ifndef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC
             temp = BASOP_Util_Divide3232_Scale_cadence( temp, 4611686, &exp );
             q_tmp = add( sub( 31, exp ), sub( q_tmp, 62 ) );
+#else
+            Word16 norm = norm_l( temp );
+            temp = L_shl( temp, norm );
+            q_tmp = add( q_tmp, norm );
+            temp = Mpy_32_32( temp, ONE_DIV_EPSILON_MANT );
+            q_tmp = sub( q_tmp, ONE_DIV_EPSILON_EXP );
+#endif
         }
         ELSE
         {
@@ -4146,8 +4247,15 @@ static void formulate2x2MixingMatrix_fx(
     // 4611686 = Q62
     IF( maxEne_fx == 0 )
     {
+#ifdef FIX_1113_OPT_DIRAC_BIN_REND
+        maxEneDiv_fx = ONE_DIV_EPSILON_MANT;
+        move32();
+        q_maxEneDiv = 31 - ONE_DIV_EPSILON_EXP;
+        move16();
+#else
         maxEneDiv_fx = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, 4611686, &exp ); // 4611686 = 1e-12f in Q62
         q_maxEneDiv = add( sub( 31, exp ), sub( Q30, 62 ) );
+#endif
     }
     ELSE
     {
@@ -4207,8 +4315,24 @@ static void formulate2x2MixingMatrix_fx(
 
     IF( temp == 0 )
     {
+#ifdef FIX_1113_OPT_DIRAC_BIN_REND
+        IF( E_out1 == 0 )
+        {
+            Ghat_fx[0] = 0;
+            exp = -19;
+            move32();
+            move16();
+        }
+        ELSE
+        {
+            temp = BASOP_Util_Divide3232_Scale_cadence( E_out1, 4611686, &exp ); // 4611686 = Q62
+            exp = sub( exp, sub( q_eout, 62 ) );
+            Ghat_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp
+        }
+#else
         BASOP_Util_Divide3232_Scale_cadence( E_out1, 4611686, &exp ); // 4611686 = Q62
         exp = sub( exp, sub( q_eout, 62 ) );
+#endif
     }
     ELSE
     {
@@ -4216,16 +4340,36 @@ static void formulate2x2MixingMatrix_fx(
 
         temp = BASOP_Util_Divide3232_Scale_cadence( E_out1, temp, &exp );
         exp = sub( exp, sub( q_eout, sub( 31, exp_temp ) ) );
+#ifdef FIX_1113_OPT_DIRAC_BIN_REND
+        Ghat_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp
+#endif
     }
+#ifndef FIX_1113_OPT_DIRAC_BIN_REND
     Ghat_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp
+#endif
     move32();
 
     temp = Mpy_32_32( E_in1, 2147484 ); // 2147484 = 0.001f in Q31
     temp = L_max( temp, E_in2 );        // q_ein
     IF( temp == 0 )
     {
+#ifdef FIX_1113_OPT_DIRAC_BIN_REND
+        IF( E_out2 == 0 )
+        { /* We can set hard-coded results */
+            Ghat_fx[1] = 0;
+            exp1 = -19;
+            move16();
+        }
+        ELSE
+        {
+            temp = BASOP_Util_Divide3232_Scale_cadence( E_out2, 4611686, &exp1 ); // 4611686 = Q62
+            exp1 = sub( exp1, sub( q_eout, 62 ) );
+            Ghat_fx[1] = Sqrt32( temp, &exp1 ); // Q = 31 - exp1
+        }
+#else
         BASOP_Util_Divide3232_Scale_cadence( E_out2, 4611686, &exp1 ); // 4611686 = Q62
         exp1 = sub( exp1, sub( q_eout, 62 ) );
+#endif
     }
     ELSE
     {
@@ -4233,8 +4377,13 @@ static void formulate2x2MixingMatrix_fx(
 
         temp = BASOP_Util_Divide3232_Scale_cadence( E_out2, temp, &exp1 );
         exp1 = sub( exp1, sub( q_eout, sub( 31, exp_temp ) ) );
+#ifdef FIX_1113_OPT_DIRAC_BIN_REND
+        Ghat_fx[1] = Sqrt32( temp, &exp1 ); // Q = 31 - exp1
+#endif
     }
+#ifndef FIX_1113_OPT_DIRAC_BIN_REND
     Ghat_fx[1] = Sqrt32( temp, &exp1 ); // Q = 31 - exp1
+#endif
     move32();
 
     q_Ghat = sub( 31, s_max( exp, exp1 ) );
@@ -4283,8 +4432,13 @@ static void formulate2x2MixingMatrix_fx(
 
     IF( D_fx[0] == 0 )
     {
+#ifndef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC
         temp = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, 4611686, &exp ); // 4611686 = 1e-12 in Q62
         exp = sub( exp, sub( Q30, 62 ) );
+#else
+        temp = ONE_DIV_EPSILON_MANT; /* Result of 1.0/eps with full precision */
+        exp = ONE_DIV_EPSILON_EXP;
+#endif
     }
     ELSE
     {
@@ -4296,8 +4450,13 @@ static void formulate2x2MixingMatrix_fx(
 
     IF( D_fx[1] == 0 )
     {
+#ifndef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC
         temp = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, 4611686, &exp1 ); // 4611686 = 1e-12 in Q62
         exp1 = sub( exp1, sub( Q30, 62 ) );
+#else
+        temp = ONE_DIV_EPSILON_MANT; /* Result of 1.0/eps with full precision */
+        exp1 = ONE_DIV_EPSILON_EXP;
+#endif
     }
     ELSE
     {
@@ -4400,25 +4559,61 @@ static void formulate2x2MixingMatrix_fx(
     matrixTransp2Mul_fx( tmpRe_fx, tmpIm_fx, &q_temp, Ure_fx, Uim_fx, &q_U, Pre_fx, Pim_fx, &q_P ); /* Nearest orthonormal matrix P to matrix A formulated */
 
     /* These are the final formulas of the JAES publication M = Ky P Kx^(-1) */
+#if ( BINAURAL_CHANNELS != 2 )
     FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ )
     {
         FOR( chB = 0; chB < BINAURAL_CHANNELS; chB++ )
         {
             IF( Sx_fx[chB] == 0 )
             {
+#ifdef FIX_1113_OPT_DIRAC_BIN_REND
+                Pre_fx[chA][chB] = Mpy_32_32( Pre_fx[chA][chB], ONE_DIV_EPSILON_MANT );
+                // q_Pre[chA][chB]  = add(sub(31, q_P), 31 - ONE_DIV_EPSILON_EXP);
+                q_Pre[chA][chB] = sub( 62 - ONE_DIV_EPSILON_EXP, q_P );
+
+
+                Pim_fx[chA][chB] = Mpy_32_32( Pim_fx[chA][chB], ONE_DIV_EPSILON_MANT );
+                // q_Pim[chA][chB]  = add(sub(31, q_P), 31 - ONE_DIV_EPSILON_EXP);
+                q_Pim[chA][chB] = sub( 62 - ONE_DIV_EPSILON_EXP, q_P );
+#else
                 Pre_fx[chA][chB] = BASOP_Util_Divide3232_Scale_cadence( Pre_fx[chA][chB], 4611686, &exp ); // 4611686 = 1e-12 in Q62
                 q_Pre[chA][chB] = add( sub( q_P, 62 ), sub( 31, exp ) );
                 Pim_fx[chA][chB] = BASOP_Util_Divide3232_Scale_cadence( Pim_fx[chA][chB], 4611686, &exp ); // 4611686 = 1e-12 in Q62
                 q_Pim[chA][chB] = add( sub( q_P, 62 ), sub( 31, exp ) );
+#endif
             }
             ELSE
             {
+#ifdef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC
+                Word16 Pre_shift, Pim_shift;
+#endif
                 temp = BASOP_Util_Add_Mant32Exp( Sx_fx[chB], sub( 31, q_Sx ), EPSILON_MANT, EPSILON_EXP, &exp_temp );
 
+#ifndef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC
                 Pre_fx[chA][chB] = BASOP_Util_Divide3232_Scale_cadence( Pre_fx[chA][chB], temp, &exp );
                 q_Pre[chA][chB] = add( sub( q_P, sub( 31, exp_temp ) ), sub( 31, exp ) );
                 Pim_fx[chA][chB] = BASOP_Util_Divide3232_Scale_cadence( Pim_fx[chA][chB], temp, &exp );
                 q_Pim[chA][chB] = add( sub( q_P, sub( 31, exp_temp ) ), sub( 31, exp ) );
+#else
+                temp = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, temp, &exp );
+                Pre_shift = norm_l( Pre_fx[chA][chB] );
+                Pim_shift = norm_l( Pim_fx[chA][chB] );
+                Pre_fx[chA][chB] = Mpy_32_32( L_shl( Pre_fx[chA][chB], Pre_shift ), temp );
+                Pim_fx[chA][chB] = Mpy_32_32( L_shl( Pim_fx[chA][chB], Pim_shift ), temp );
+                q_temp = add( sub( sub( q_P, exp ), sub( 31, Q30 ) ), exp_temp );
+                q_Pre[chA][chB] = add( q_temp, Pre_shift );
+                q_Pim[chA][chB] = add( q_temp, Pim_shift );
+#endif
+            }
+            if ( Pre_fx[chA][chB] == 0 )
+            {
+                q_Pre[chA][chB] = 31;
+                move16();
+            }
+            if ( Pim_fx[chA][chB] == 0 )
+            {
+                q_Pim[chA][chB] = 31;
+                move16();
             }
             move32();
             move32();
@@ -4426,6 +4621,72 @@ static void formulate2x2MixingMatrix_fx(
             move16();
         }
     }
+#else
+    /* BINAURAL_CHANNEL == 2 */
+    FOR( chB = 0; chB < BINAURAL_CHANNELS; chB++ )
+    {
+        IF( Sx_fx[chB] == 0 )
+        {
+            Pre_fx[0][chB] = Mpy_32_32( Pre_fx[0][chB], ONE_DIV_EPSILON_MANT );
+            q_Pre[0][chB] = sub( 62 - ONE_DIV_EPSILON_EXP, q_P );
+            Pim_fx[0][chB] = Mpy_32_32( Pim_fx[0][chB], ONE_DIV_EPSILON_MANT );
+            q_Pim[0][chB] = sub( 62 - ONE_DIV_EPSILON_EXP, q_P );
+            Pre_fx[1][chB] = Mpy_32_32( Pre_fx[1][chB], ONE_DIV_EPSILON_MANT );
+            q_Pre[1][chB] = sub( 62 - ONE_DIV_EPSILON_EXP, q_P );
+            Pim_fx[1][chB] = Mpy_32_32( Pim_fx[1][chB], ONE_DIV_EPSILON_MANT );
+            q_Pim[1][chB] = sub( 62 - ONE_DIV_EPSILON_EXP, q_P );
+        }
+        ELSE
+        {
+            Word16 Pre_shift, Pim_shift;
+            temp = BASOP_Util_Add_Mant32Exp( Sx_fx[chB], sub( 31, q_Sx ), EPSILON_MANT, EPSILON_EXP, &exp_temp );
+            temp = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, temp, &exp );
+            q_temp = add( sub( sub( q_P, exp ), sub( 31, Q30 ) ), exp_temp );
+
+            Pre_shift = norm_l( Pre_fx[0][chB] );
+            Pim_shift = norm_l( Pim_fx[0][chB] );
+            Pre_fx[0][chB] = Mpy_32_32( L_shl( Pre_fx[0][chB], Pre_shift ), temp );
+            Pim_fx[0][chB] = Mpy_32_32( L_shl( Pim_fx[0][chB], Pim_shift ), temp );
+            q_Pre[0][chB] = add( q_temp, Pre_shift );
+            q_Pim[0][chB] = add( q_temp, Pim_shift );
+
+            Pre_shift = norm_l( Pre_fx[1][chB] );
+            Pim_shift = norm_l( Pim_fx[1][chB] );
+            Pre_fx[1][chB] = Mpy_32_32( L_shl( Pre_fx[1][chB], Pre_shift ), temp );
+            Pim_fx[1][chB] = Mpy_32_32( L_shl( Pim_fx[1][chB], Pim_shift ), temp );
+            q_Pre[1][chB] = add( q_temp, Pre_shift );
+            q_Pim[1][chB] = add( q_temp, Pim_shift );
+        }
+        if ( Pre_fx[0][chB] == 0 )
+        {
+            q_Pre[0][chB] = 31;
+            move16();
+        }
+        if ( Pim_fx[0][chB] == 0 )
+        {
+            q_Pim[0][chB] = 31;
+            move16();
+        }
+        if ( Pre_fx[1][chB] == 0 )
+        {
+            q_Pre[1][chB] = 31;
+            move16();
+        }
+        if ( Pim_fx[1][chB] == 0 )
+        {
+            q_Pim[1][chB] = 31;
+            move16();
+        }
+        move32();
+        move32();
+        move16();
+        move16();
+        move32();
+        move32();
+        move16();
+        move16();
+    }
+#endif
     minimum_s( q_Pre[0], i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ), &exp );
     q_P = s_min( q_P, exp );
     minimum_s( q_Pim[0], i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ), &exp );
-- 
GitLab


From 0d0272942031a842d4bbf1920549b55beb176f59 Mon Sep 17 00:00:00 2001
From: Markus Multrus <markus.multrus@iis.fraunhofer.de>
Date: Sun, 15 Dec 2024 22:28:39 +0100
Subject: [PATCH 2/3] defines

---
 lib_rend/ivas_dirac_dec_binaural_functions.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/lib_rend/ivas_dirac_dec_binaural_functions.c b/lib_rend/ivas_dirac_dec_binaural_functions.c
index 433da6188..644e84398 100644
--- a/lib_rend/ivas_dirac_dec_binaural_functions.c
+++ b/lib_rend/ivas_dirac_dec_binaural_functions.c
@@ -74,7 +74,7 @@ Word16 slot_fx[4] = { 32767, 16384, 10922, 8192 };
 #define INV_TAN30_FX               28377      // Q14
 #define EPSILON_MANT               1180591621 /* 1e-12 = 0.5497558*(2^-39) in Q70 */
 #define EPSILON_EXP                ( -39 )
-#ifdef FIX_1113_OPT_DIRAC_BIN_REND
+#ifdef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC
 #define ONE_DIV_EPSILON_MANT 1953125000 /* 1e+12 = 0.9094947*(2^40) */
 #define ONE_DIV_EPSILON_EXP  ( 40 )
 #endif
@@ -4247,7 +4247,7 @@ static void formulate2x2MixingMatrix_fx(
     // 4611686 = Q62
     IF( maxEne_fx == 0 )
     {
-#ifdef FIX_1113_OPT_DIRAC_BIN_REND
+#ifdef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC
         maxEneDiv_fx = ONE_DIV_EPSILON_MANT;
         move32();
         q_maxEneDiv = 31 - ONE_DIV_EPSILON_EXP;
@@ -4315,7 +4315,7 @@ static void formulate2x2MixingMatrix_fx(
 
     IF( temp == 0 )
     {
-#ifdef FIX_1113_OPT_DIRAC_BIN_REND
+#ifdef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC
         IF( E_out1 == 0 )
         {
             Ghat_fx[0] = 0;
@@ -4340,11 +4340,11 @@ static void formulate2x2MixingMatrix_fx(
 
         temp = BASOP_Util_Divide3232_Scale_cadence( E_out1, temp, &exp );
         exp = sub( exp, sub( q_eout, sub( 31, exp_temp ) ) );
-#ifdef FIX_1113_OPT_DIRAC_BIN_REND
+#ifdef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC
         Ghat_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp
 #endif
     }
-#ifndef FIX_1113_OPT_DIRAC_BIN_REND
+#ifndef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC
     Ghat_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp
 #endif
     move32();
@@ -4353,7 +4353,7 @@ static void formulate2x2MixingMatrix_fx(
     temp = L_max( temp, E_in2 );        // q_ein
     IF( temp == 0 )
     {
-#ifdef FIX_1113_OPT_DIRAC_BIN_REND
+#ifdef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC
         IF( E_out2 == 0 )
         { /* We can set hard-coded results */
             Ghat_fx[1] = 0;
@@ -4377,11 +4377,11 @@ static void formulate2x2MixingMatrix_fx(
 
         temp = BASOP_Util_Divide3232_Scale_cadence( E_out2, temp, &exp1 );
         exp1 = sub( exp1, sub( q_eout, sub( 31, exp_temp ) ) );
-#ifdef FIX_1113_OPT_DIRAC_BIN_REND
+#ifdef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC
         Ghat_fx[1] = Sqrt32( temp, &exp1 ); // Q = 31 - exp1
 #endif
     }
-#ifndef FIX_1113_OPT_DIRAC_BIN_REND
+#ifndef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC
     Ghat_fx[1] = Sqrt32( temp, &exp1 ); // Q = 31 - exp1
 #endif
     move32();
@@ -4566,7 +4566,7 @@ static void formulate2x2MixingMatrix_fx(
         {
             IF( Sx_fx[chB] == 0 )
             {
-#ifdef FIX_1113_OPT_DIRAC_BIN_REND
+#ifdef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC
                 Pre_fx[chA][chB] = Mpy_32_32( Pre_fx[chA][chB], ONE_DIV_EPSILON_MANT );
                 // q_Pre[chA][chB]  = add(sub(31, q_P), 31 - ONE_DIV_EPSILON_EXP);
                 q_Pre[chA][chB] = sub( 62 - ONE_DIV_EPSILON_EXP, q_P );
-- 
GitLab


From ab28b55264d593f9059e8fa3ae571121daf57294 Mon Sep 17 00:00:00 2001
From: Markus Multrus <markus.multrus@iis.fraunhofer.de>
Date: Mon, 13 Jan 2025 15:55:12 +0100
Subject: [PATCH 3/3] revert problematic ISqrt32() call, and stick to default
 code

---
 lib_rend/ivas_dirac_dec_binaural_functions.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/lib_rend/ivas_dirac_dec_binaural_functions.c b/lib_rend/ivas_dirac_dec_binaural_functions.c
index 644e84398..29d337dc6 100644
--- a/lib_rend/ivas_dirac_dec_binaural_functions.c
+++ b/lib_rend/ivas_dirac_dec_binaural_functions.c
@@ -3358,12 +3358,14 @@ static void eig2x2_fx(
 
             tmp3 = BASOP_Util_Add_Mant32Exp( tmp2, sub( 31, q_tmp2 ), epsilon_mant, epsilon_exp, &exp_tmp3 );
 
-#ifndef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC
+#if 1
             tmp2 = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, tmp3, &exp );
             exp = sub( exp, sub( Q30, sub( 31, exp_tmp3 ) ) );
             normVal_fx = Sqrt32( tmp2, &exp ); // q_tmp2
             q_tmp2 = sub( 31, exp );
 #else
+            /* Note: This code part does not work yet, see pipeline issue for BASOP #1009  */
+            /* although the same code works at other places: mantissa and q_format is fine */
             normVal_fx = ISqrt32( tmp3, &exp_tmp3 );
             q_tmp2 = sub( 31, exp_tmp3 );
 #endif
-- 
GitLab