From 6b052e9535277432f63a9daf3cc2c7bc88a184e2 Mon Sep 17 00:00:00 2001
From: Sandesh Venkatesh <sandesh.venkatesh@ittiam.com>
Date: Tue, 29 Apr 2025 13:33:19 +0530
Subject: [PATCH 1/2] Fix for 3GPP issue 1417: Audible artifact at SBA 13.2
 kbps, 48KHz

Link #1417
---
 lib_enc/sig_clas_fx.c | 41 +++++++++++++++++++++--------------------
 1 file changed, 21 insertions(+), 20 deletions(-)

diff --git a/lib_enc/sig_clas_fx.c b/lib_enc/sig_clas_fx.c
index cf5d31d22..ed34798fc 100644
--- a/lib_enc/sig_clas_fx.c
+++ b/lib_enc/sig_clas_fx.c
@@ -53,9 +53,10 @@ Word16 signal_clas_fx(                       /* o  : classification for current
 {
     Word32 Ltmp;
     Word16 mean_voi2, een, corn, zcn, relEn, pcn, fmerit1;
-    Word16 i, clas, pc, zc, lo, lo2, hi, hi2, exp_ee, frac_ee;
+    Word16 i, clas, pc, zc, exp_ee;
     Word16 tmp16, tmpS;
     const Word16 *pt1;
+    Word64 tmp64;
 #ifdef BASOP_NOGLOB_DECLARE_LOCAL
     Flag Overflow = 0;
     move32();
@@ -73,34 +74,34 @@ Word16 signal_clas_fx(                       /* o  : classification for current
     mean_voi2 = mac_r( Ltmp, st->voicing_fx[2], 16384 );
 
     /* average spectral tilt in dB */
-    lo = L_Extract_lc( ee[0], &hi );
-    lo2 = L_Extract_lc( ee[1], &hi2 );
-    Ltmp = L_mult( lo, lo2 ); /* Q5*Q5->Q11 */
-
-    test();
-    test();
-    IF( LT_32( Ltmp, 2048 ) )
+    tmp64 = W_mult0_32_32( ee[0], ee[1] );
+    exp_ee = W_norm( tmp64 );
+    Ltmp = W_extract_h( W_shl( tmp64, exp_ee ) ); // Q = Q6+Q6 + exp_ee - 32
+    exp_ee = sub( 31, sub( add( Q12, exp_ee ), 32 ) );
+    IF( EQ_16( BASOP_Util_Cmp_Mant32Exp( Ltmp, exp_ee, ONE_IN_Q31, 0 ), -1 ) )
     {
         een = 0;
         move16();
     }
-    ELSE IF( GT_32( Ltmp, THRES_EEN ) || hi > 0 || hi2 > 0 )
-    {
-        een = 512;
-        move16();
-    }
     ELSE
     {
         /* mean_ee2 = 0.5f * 20.0f * (float)log10( tmp ); */
         /* een = K_EE_ENC * mean_ee2 + C_EE_ENC; */
-        exp_ee = norm_l( Ltmp );
-        frac_ee = Log2_norm_lc( L_shl( Ltmp, exp_ee ) );
-        exp_ee = sub( 30 - 11, exp_ee );
-        Ltmp = Mpy_32_16( exp_ee, frac_ee, LG10 ); /* Ltmp Q14 */
-        een = round_fx( L_shl( Ltmp, 16 - 5 ) );   /* Q14 -> Q9 */
-        een = mac_r( C_EE_FX, een, K_EE_FX );
-    }
+        Ltmp = BASOP_Util_Log10( Ltmp, exp_ee );                            // Q25
+        Ltmp = Mpy_32_32( Ltmp, 671088640 /*20.f in Q25*/ );                // Q25 + Q25 -Q31 = Q19 * 0.5 = Q20
+        een = extract_l( L_shl( Mpy_32_16_1( Ltmp, K_EE_FX ), Q9 - Q20 ) ); // Q9
 
+        IF( GT_16( een, 512 ) )
+        {
+            een = 512;
+            move16();
+        }
+        ELSE IF( een < 0 )
+        {
+            een = 0;
+            move16();
+        }
+    }
     /* compute zero crossing rate */
     pt1 = speech + sub( L_look, 1 );
     tmpS = shr( *pt1, 15 ); /* sets 'tmpS to -1 if *pt1 < 0 */
-- 
GitLab


From 9891182d9469eaa7433f31e765122c1663cdc7ca Mon Sep 17 00:00:00 2001
From: Sandesh Venkatesh <sandesh.venkatesh@ittiam.com>
Date: Tue, 29 Apr 2025 14:36:41 +0530
Subject: [PATCH 2/2] Fix for EVS Bitexactness issue

---
 lib_enc/ivas_core_pre_proc_front_fx.c |   2 +-
 lib_enc/prot_fx_enc.h                 |   9 +
 lib_enc/sig_clas_fx.c                 | 231 ++++++++++++++++++++++++++
 3 files changed, 241 insertions(+), 1 deletion(-)

diff --git a/lib_enc/ivas_core_pre_proc_front_fx.c b/lib_enc/ivas_core_pre_proc_front_fx.c
index 3d7d87bc2..4a8c4edd2 100644
--- a/lib_enc/ivas_core_pre_proc_front_fx.c
+++ b/lib_enc/ivas_core_pre_proc_front_fx.c
@@ -1425,7 +1425,7 @@ ivas_error pre_proc_front_ivas_fx(
      * TC frame selection
      *-----------------------------------------------------------------*/
 
-    st->clas = signal_clas_fx( st, inp_12k8_fx, ee_fx, *relE_fx, L_look, tdm_SM_last_clas ); /* Q0 */
+    st->clas = signal_clas_ivas_fx( st, inp_12k8_fx, ee_fx, *relE_fx, L_look, tdm_SM_last_clas ); /* Q0 */
     move16();
 
     select_TC_fx( MODE1, st->tc_cnt, &st->coder_type, st->localVAD );
diff --git a/lib_enc/prot_fx_enc.h b/lib_enc/prot_fx_enc.h
index 4237aa9c3..2aa5df5c4 100644
--- a/lib_enc/prot_fx_enc.h
+++ b/lib_enc/prot_fx_enc.h
@@ -570,6 +570,15 @@ Word16 signal_clas_fx(                       /* o  : classification for current
                        Word16 *uc_clas       /* o  : temporary classification used in music/speech class*/
 );
 
+Word16 signal_clas_ivas_fx(                       /* o  : classification for current frames              */
+                            Encoder_State *st,    /* i/o: encoder state structure                           */
+                            const Word16 *speech, /* i  : pointer to speech signal for E computation        */
+                            const Word32 *ee,     /* i  : lf/hf E ration for 2 half-frames                  */
+                            const Word16 relE,    /* i  : frame relative E to the long term average         */
+                            const Word16 L_look,  /* i  : look-ahead                                        */
+                            Word16 *uc_clas       /* o  : temporary classification used in music/speech class*/
+);
+
 void speech_music_classif_fx(
     Encoder_State *st,            /* i/o: state structure                                 */
     const Word16 *new_inp,        /* i  : new input signal                                */
diff --git a/lib_enc/sig_clas_fx.c b/lib_enc/sig_clas_fx.c
index ed34798fc..beb97514a 100644
--- a/lib_enc/sig_clas_fx.c
+++ b/lib_enc/sig_clas_fx.c
@@ -50,6 +50,236 @@ Word16 signal_clas_fx(                       /* o  : classification for current
                        const Word16 L_look,  /* i  : look-ahead                                        */
                        Word16 *clas_mod      /* o  : class flag for NOOP detection                     */
 )
+{
+    Word32 Ltmp;
+    Word16 mean_voi2, een, corn, zcn, relEn, pcn, fmerit1;
+    Word16 i, clas, pc, zc, lo, lo2, hi, hi2, exp_ee, frac_ee;
+    Word16 tmp16, tmpS;
+    const Word16 *pt1;
+#ifdef BASOP_NOGLOB_DECLARE_LOCAL
+    Flag Overflow = 0;
+    move32();
+#endif
+
+    /*----------------------------------------------------------------*
+     * Calculate average voicing
+     * Calculate average spectral tilt
+     * Calculate zero-crossing rate
+     * Calculate pitch stability
+     *----------------------------------------------------------------*/
+
+    /* average voicing on second half-frame and look-ahead */
+    Ltmp = L_mult( st->voicing_fx[1], 16384 ); /* Q15*Q14->Q30 */
+    mean_voi2 = mac_r( Ltmp, st->voicing_fx[2], 16384 );
+
+    /* average spectral tilt in dB */
+    lo = L_Extract_lc( ee[0], &hi );
+    lo2 = L_Extract_lc( ee[1], &hi2 );
+    Ltmp = L_mult( lo, lo2 ); /* Q5*Q5->Q11 */
+
+    test();
+    test();
+    IF( LT_32( Ltmp, 2048 ) )
+    {
+        een = 0;
+        move16();
+    }
+    ELSE IF( GT_32( Ltmp, THRES_EEN ) || hi > 0 || hi2 > 0 )
+    {
+        een = 512;
+        move16();
+    }
+    ELSE
+    {
+        /* mean_ee2 = 0.5f * 20.0f * (float)log10( tmp ); */
+        /* een = K_EE_ENC * mean_ee2 + C_EE_ENC; */
+        exp_ee = norm_l( Ltmp );
+        frac_ee = Log2_norm_lc( L_shl( Ltmp, exp_ee ) );
+        exp_ee = sub( 30 - 11, exp_ee );
+        Ltmp = Mpy_32_16( exp_ee, frac_ee, LG10 ); /* Ltmp Q14 */
+        een = round_fx( L_shl( Ltmp, 16 - 5 ) );   /* Q14 -> Q9 */
+        een = mac_r( C_EE_FX, een, K_EE_FX );
+    }
+    /* compute zero crossing rate */
+    pt1 = speech + sub( L_look, 1 );
+    tmpS = shr( *pt1, 15 ); /* sets 'tmpS to -1 if *pt1 < 0 */
+    Ltmp = L_deposit_l( 0 );
+    FOR( i = 0; i < L_FRAME; i++ )
+    {
+        tmp16 = add( 1, tmpS );
+        pt1++;
+        tmpS = shr( *pt1, 15 ); /* pt1 >=0 ---> 0 OTHERWISE -1   */
+        Ltmp = L_msu0( Ltmp, tmpS, tmp16 );
+    }
+    zc = extract_l( Ltmp );
+
+    /* compute pitch stability */
+    pc = add( abs_s( sub( st->pitch[1], st->pitch[0] ) ), abs_s( sub( st->pitch[2], st->pitch[1] ) ) );
+    st->tdm_pc = pc;
+    move16();
+    /*-----------------------------------------------------------------*
+     * Transform parameters to the range <0:1>
+     * Compute the merit function
+     *-----------------------------------------------------------------*/
+
+    /* corn = K_COR * mean_voi2 + C_COR */
+    Ltmp = L_mult( C_COR_FX, 32767 );
+    corn = round_fx( L_shl( L_mac( Ltmp, mean_voi2, K_COR_FX ), -4 ) ); /*Q13+Q13*Q15 =>Q13->Q9*/
+    /* Limit [0, 1] */
+    corn = s_max( corn, 0 );
+    corn = s_min( corn, 512 );
+
+    Ltmp = L_mult( C_ZC_FX, 4 );                                   /*Q13*Q2 -> Q16*/
+    zcn = round_fx( L_shl( L_mac( Ltmp, zc, K_ZC_FX ), 16 - 7 ) ); /*Q0*Q15 + Q16*/
+    /* Limit [0, 1] */
+    zcn = s_max( zcn, 0 );
+    zcn = s_min( zcn, 512 );
+
+    Ltmp = L_mult( C_RELE_FX, 256 );                                /*Q15*Q8 ->Q24*/
+    relEn = round_fx( L_shl( L_mac( Ltmp, relE, K_RELE_FX ), 1 ) ); /*relE in Q8 but relEn in Q9*/
+    /* Limit [0.5, 1] */
+    relEn = s_max( relEn, 256 );
+    relEn = s_min( relEn, 512 );
+
+    Ltmp = L_mult( C_PC_FX, 2 );                                   /*Q14*Q1 -> Q16*/
+    pcn = round_fx( L_shl( L_mac( Ltmp, pc, K_PC_FX ), 16 - 7 ) ); /*Q16 + Q0*Q15*/
+    /* Limit [0, 1] */
+    pcn = s_max( pcn, 0 );
+    pcn = s_min( pcn, 512 );
+
+    Ltmp = L_mult( een, 10923 );
+    Ltmp = L_mac( Ltmp, corn, 21845 );
+    Ltmp = L_mac( Ltmp, zcn, 10923 );
+    Ltmp = L_mac( Ltmp, relEn, 10923 );
+    Ltmp = L_mac( Ltmp, pcn, 10923 );
+
+    fmerit1 = round_fx_o( L_shl_o( Ltmp, 16 - 10 - 1, &Overflow ), &Overflow ); /* fmerit1 ->Q15 */
+
+    /*-----------------------------------------------------------------*
+     * FEC classification
+     *-----------------------------------------------------------------*/
+
+    st->fmerit_dt = sub( st->prev_fmerit, fmerit1 ); /*Q15*/
+    move16();
+    st->prev_fmerit = fmerit1;
+    move16();
+
+    /* FEC classification */
+    test();
+    test();
+    IF( st->localVAD == 0 || EQ_16( st->coder_type, UNVOICED ) || LT_16( relE, -1536 ) )
+    {
+        clas = UNVOICED_CLAS;
+        *clas_mod = clas;
+        move16();
+        move16();
+    }
+    ELSE
+    {
+        SWITCH( st->last_clas )
+        {
+            case VOICED_CLAS:
+            case ONSET:
+            case VOICED_TRANSITION:
+
+                IF( LT_16( fmerit1, 16056 ) ) /*0.49f*/
+                {
+                    clas = UNVOICED_CLAS;
+                    move16();
+                }
+                ELSE IF( LT_16( fmerit1, 21626 ) ) /*0.66*/
+                {
+                    clas = VOICED_TRANSITION;
+                    move16();
+                }
+                ELSE
+                {
+                    clas = VOICED_CLAS;
+                    move16();
+                }
+                IF( LT_16( fmerit1, 14745 /* 0.45f*/ ) )
+                {
+                    *clas_mod = UNVOICED_CLAS;
+                    move16();
+                }
+                ELSE IF( LT_16( fmerit1, 21626 /* 0.66f*/ ) )
+                {
+                    *clas_mod = VOICED_TRANSITION;
+                    move16();
+                }
+                ELSE
+                {
+                    *clas_mod = VOICED_CLAS;
+                    move16();
+                }
+                BREAK;
+
+            case UNVOICED_CLAS:
+            case UNVOICED_TRANSITION:
+                IF( GT_16( fmerit1, 20643 ) ) /*0.63*/
+                {
+                    clas = ONSET;
+                    move16();
+                }
+                ELSE IF( GT_16( fmerit1, 19169 ) ) /*0.585*/
+                {
+                    clas = UNVOICED_TRANSITION;
+                    move16();
+                }
+                ELSE
+                {
+                    clas = UNVOICED_CLAS;
+                    move16();
+                }
+                *clas_mod = clas;
+                move16();
+
+                BREAK;
+
+            default:
+                clas = UNVOICED_CLAS;
+                *clas_mod = clas;
+                move16();
+                move16();
+                BREAK;
+        }
+    }
+    /* Onset classification */
+
+    /* tc_cnt == -1: frame after TC frame in continuous block of GC/VC frames */
+    /* tc_cnt ==  0: UC frame */
+    /* tc_cnt ==  1: onset/transition frame, coded by GC coder type */
+    /* tc_cnt ==  2: frame after onset/transition frame, coded by TC coder type */
+
+    if ( clas == 0 )
+    {
+        st->tc_cnt = 0;
+        move16();
+    }
+
+    test();
+    IF( GE_16( clas, VOICED_TRANSITION ) && st->tc_cnt >= 0 )
+    {
+        st->tc_cnt = add( st->tc_cnt, 1 );
+        move16();
+    }
+
+    if ( GT_16( st->tc_cnt, 2 ) )
+    {
+        st->tc_cnt = -1;
+        move16();
+    }
+    return clas;
+}
+
+Word16 signal_clas_ivas_fx(                       /* o  : classification for current frames              */
+                            Encoder_State *st,    /* i/o: encoder state structure                           */
+                            const Word16 *speech, /* i  : pointer to speech signal for E computation in Qx */
+                            const Word32 *ee,     /* i  : lf/hf E ration for 2 half-frames in Q6            */
+                            const Word16 relE,    /* i  : frame relative E to the long term average in Q8   */
+                            const Word16 L_look,  /* i  : look-ahead                                        */
+                            Word16 *clas_mod      /* o  : class flag for NOOP detection                     */
+)
 {
     Word32 Ltmp;
     Word16 mean_voi2, een, corn, zcn, relEn, pcn, fmerit1;
@@ -273,6 +503,7 @@ Word16 signal_clas_fx(                       /* o  : classification for current
     }
     return clas;
 }
+
 /*-------------------------------------------------------------------*
  * select_TC_fx()
  *
-- 
GitLab