From 6b052e9535277432f63a9daf3cc2c7bc88a184e2 Mon Sep 17 00:00:00 2001 From: Sandesh Venkatesh Date: Tue, 29 Apr 2025 13:33:19 +0530 Subject: [PATCH 1/2] Fix for 3GPP issue 1417: Audible artifact at SBA 13.2 kbps, 48KHz Link #1417 --- lib_enc/sig_clas_fx.c | 41 +++++++++++++++++++++-------------------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/lib_enc/sig_clas_fx.c b/lib_enc/sig_clas_fx.c index cf5d31d22..ed34798fc 100644 --- a/lib_enc/sig_clas_fx.c +++ b/lib_enc/sig_clas_fx.c @@ -53,9 +53,10 @@ Word16 signal_clas_fx( /* o : classification for current { Word32 Ltmp; Word16 mean_voi2, een, corn, zcn, relEn, pcn, fmerit1; - Word16 i, clas, pc, zc, lo, lo2, hi, hi2, exp_ee, frac_ee; + Word16 i, clas, pc, zc, exp_ee; Word16 tmp16, tmpS; const Word16 *pt1; + Word64 tmp64; #ifdef BASOP_NOGLOB_DECLARE_LOCAL Flag Overflow = 0; move32(); @@ -73,34 +74,34 @@ Word16 signal_clas_fx( /* o : classification for current mean_voi2 = mac_r( Ltmp, st->voicing_fx[2], 16384 ); /* average spectral tilt in dB */ - lo = L_Extract_lc( ee[0], &hi ); - lo2 = L_Extract_lc( ee[1], &hi2 ); - Ltmp = L_mult( lo, lo2 ); /* Q5*Q5->Q11 */ - - test(); - test(); - IF( LT_32( Ltmp, 2048 ) ) + tmp64 = W_mult0_32_32( ee[0], ee[1] ); + exp_ee = W_norm( tmp64 ); + Ltmp = W_extract_h( W_shl( tmp64, exp_ee ) ); // Q = Q6+Q6 + exp_ee - 32 + exp_ee = sub( 31, sub( add( Q12, exp_ee ), 32 ) ); + IF( EQ_16( BASOP_Util_Cmp_Mant32Exp( Ltmp, exp_ee, ONE_IN_Q31, 0 ), -1 ) ) { een = 0; move16(); } - ELSE IF( GT_32( Ltmp, THRES_EEN ) || hi > 0 || hi2 > 0 ) - { - een = 512; - move16(); - } ELSE { /* mean_ee2 = 0.5f * 20.0f * (float)log10( tmp ); */ /* een = K_EE_ENC * mean_ee2 + C_EE_ENC; */ - exp_ee = norm_l( Ltmp ); - frac_ee = Log2_norm_lc( L_shl( Ltmp, exp_ee ) ); - exp_ee = sub( 30 - 11, exp_ee ); - Ltmp = Mpy_32_16( exp_ee, frac_ee, LG10 ); /* Ltmp Q14 */ - een = round_fx( L_shl( Ltmp, 16 - 5 ) ); /* Q14 -> Q9 */ - een = mac_r( C_EE_FX, een, K_EE_FX ); - } + Ltmp = BASOP_Util_Log10( Ltmp, exp_ee ); // Q25 + Ltmp = Mpy_32_32( Ltmp, 671088640 /*20.f in Q25*/ ); // Q25 + Q25 -Q31 = Q19 * 0.5 = Q20 + een = extract_l( L_shl( Mpy_32_16_1( Ltmp, K_EE_FX ), Q9 - Q20 ) ); // Q9 + IF( GT_16( een, 512 ) ) + { + een = 512; + move16(); + } + ELSE IF( een < 0 ) + { + een = 0; + move16(); + } + } /* compute zero crossing rate */ pt1 = speech + sub( L_look, 1 ); tmpS = shr( *pt1, 15 ); /* sets 'tmpS to -1 if *pt1 < 0 */ -- GitLab From 9891182d9469eaa7433f31e765122c1663cdc7ca Mon Sep 17 00:00:00 2001 From: Sandesh Venkatesh Date: Tue, 29 Apr 2025 14:36:41 +0530 Subject: [PATCH 2/2] Fix for EVS Bitexactness issue --- lib_enc/ivas_core_pre_proc_front_fx.c | 2 +- lib_enc/prot_fx_enc.h | 9 + lib_enc/sig_clas_fx.c | 231 ++++++++++++++++++++++++++ 3 files changed, 241 insertions(+), 1 deletion(-) diff --git a/lib_enc/ivas_core_pre_proc_front_fx.c b/lib_enc/ivas_core_pre_proc_front_fx.c index 3d7d87bc2..4a8c4edd2 100644 --- a/lib_enc/ivas_core_pre_proc_front_fx.c +++ b/lib_enc/ivas_core_pre_proc_front_fx.c @@ -1425,7 +1425,7 @@ ivas_error pre_proc_front_ivas_fx( * TC frame selection *-----------------------------------------------------------------*/ - st->clas = signal_clas_fx( st, inp_12k8_fx, ee_fx, *relE_fx, L_look, tdm_SM_last_clas ); /* Q0 */ + st->clas = signal_clas_ivas_fx( st, inp_12k8_fx, ee_fx, *relE_fx, L_look, tdm_SM_last_clas ); /* Q0 */ move16(); select_TC_fx( MODE1, st->tc_cnt, &st->coder_type, st->localVAD ); diff --git a/lib_enc/prot_fx_enc.h b/lib_enc/prot_fx_enc.h index 4237aa9c3..2aa5df5c4 100644 --- a/lib_enc/prot_fx_enc.h +++ b/lib_enc/prot_fx_enc.h @@ -570,6 +570,15 @@ Word16 signal_clas_fx( /* o : classification for current Word16 *uc_clas /* o : temporary classification used in music/speech class*/ ); +Word16 signal_clas_ivas_fx( /* o : classification for current frames */ + Encoder_State *st, /* i/o: encoder state structure */ + const Word16 *speech, /* i : pointer to speech signal for E computation */ + const Word32 *ee, /* i : lf/hf E ration for 2 half-frames */ + const Word16 relE, /* i : frame relative E to the long term average */ + const Word16 L_look, /* i : look-ahead */ + Word16 *uc_clas /* o : temporary classification used in music/speech class*/ +); + void speech_music_classif_fx( Encoder_State *st, /* i/o: state structure */ const Word16 *new_inp, /* i : new input signal */ diff --git a/lib_enc/sig_clas_fx.c b/lib_enc/sig_clas_fx.c index ed34798fc..beb97514a 100644 --- a/lib_enc/sig_clas_fx.c +++ b/lib_enc/sig_clas_fx.c @@ -50,6 +50,236 @@ Word16 signal_clas_fx( /* o : classification for current const Word16 L_look, /* i : look-ahead */ Word16 *clas_mod /* o : class flag for NOOP detection */ ) +{ + Word32 Ltmp; + Word16 mean_voi2, een, corn, zcn, relEn, pcn, fmerit1; + Word16 i, clas, pc, zc, lo, lo2, hi, hi2, exp_ee, frac_ee; + Word16 tmp16, tmpS; + const Word16 *pt1; +#ifdef BASOP_NOGLOB_DECLARE_LOCAL + Flag Overflow = 0; + move32(); +#endif + + /*----------------------------------------------------------------* + * Calculate average voicing + * Calculate average spectral tilt + * Calculate zero-crossing rate + * Calculate pitch stability + *----------------------------------------------------------------*/ + + /* average voicing on second half-frame and look-ahead */ + Ltmp = L_mult( st->voicing_fx[1], 16384 ); /* Q15*Q14->Q30 */ + mean_voi2 = mac_r( Ltmp, st->voicing_fx[2], 16384 ); + + /* average spectral tilt in dB */ + lo = L_Extract_lc( ee[0], &hi ); + lo2 = L_Extract_lc( ee[1], &hi2 ); + Ltmp = L_mult( lo, lo2 ); /* Q5*Q5->Q11 */ + + test(); + test(); + IF( LT_32( Ltmp, 2048 ) ) + { + een = 0; + move16(); + } + ELSE IF( GT_32( Ltmp, THRES_EEN ) || hi > 0 || hi2 > 0 ) + { + een = 512; + move16(); + } + ELSE + { + /* mean_ee2 = 0.5f * 20.0f * (float)log10( tmp ); */ + /* een = K_EE_ENC * mean_ee2 + C_EE_ENC; */ + exp_ee = norm_l( Ltmp ); + frac_ee = Log2_norm_lc( L_shl( Ltmp, exp_ee ) ); + exp_ee = sub( 30 - 11, exp_ee ); + Ltmp = Mpy_32_16( exp_ee, frac_ee, LG10 ); /* Ltmp Q14 */ + een = round_fx( L_shl( Ltmp, 16 - 5 ) ); /* Q14 -> Q9 */ + een = mac_r( C_EE_FX, een, K_EE_FX ); + } + /* compute zero crossing rate */ + pt1 = speech + sub( L_look, 1 ); + tmpS = shr( *pt1, 15 ); /* sets 'tmpS to -1 if *pt1 < 0 */ + Ltmp = L_deposit_l( 0 ); + FOR( i = 0; i < L_FRAME; i++ ) + { + tmp16 = add( 1, tmpS ); + pt1++; + tmpS = shr( *pt1, 15 ); /* pt1 >=0 ---> 0 OTHERWISE -1 */ + Ltmp = L_msu0( Ltmp, tmpS, tmp16 ); + } + zc = extract_l( Ltmp ); + + /* compute pitch stability */ + pc = add( abs_s( sub( st->pitch[1], st->pitch[0] ) ), abs_s( sub( st->pitch[2], st->pitch[1] ) ) ); + st->tdm_pc = pc; + move16(); + /*-----------------------------------------------------------------* + * Transform parameters to the range <0:1> + * Compute the merit function + *-----------------------------------------------------------------*/ + + /* corn = K_COR * mean_voi2 + C_COR */ + Ltmp = L_mult( C_COR_FX, 32767 ); + corn = round_fx( L_shl( L_mac( Ltmp, mean_voi2, K_COR_FX ), -4 ) ); /*Q13+Q13*Q15 =>Q13->Q9*/ + /* Limit [0, 1] */ + corn = s_max( corn, 0 ); + corn = s_min( corn, 512 ); + + Ltmp = L_mult( C_ZC_FX, 4 ); /*Q13*Q2 -> Q16*/ + zcn = round_fx( L_shl( L_mac( Ltmp, zc, K_ZC_FX ), 16 - 7 ) ); /*Q0*Q15 + Q16*/ + /* Limit [0, 1] */ + zcn = s_max( zcn, 0 ); + zcn = s_min( zcn, 512 ); + + Ltmp = L_mult( C_RELE_FX, 256 ); /*Q15*Q8 ->Q24*/ + relEn = round_fx( L_shl( L_mac( Ltmp, relE, K_RELE_FX ), 1 ) ); /*relE in Q8 but relEn in Q9*/ + /* Limit [0.5, 1] */ + relEn = s_max( relEn, 256 ); + relEn = s_min( relEn, 512 ); + + Ltmp = L_mult( C_PC_FX, 2 ); /*Q14*Q1 -> Q16*/ + pcn = round_fx( L_shl( L_mac( Ltmp, pc, K_PC_FX ), 16 - 7 ) ); /*Q16 + Q0*Q15*/ + /* Limit [0, 1] */ + pcn = s_max( pcn, 0 ); + pcn = s_min( pcn, 512 ); + + Ltmp = L_mult( een, 10923 ); + Ltmp = L_mac( Ltmp, corn, 21845 ); + Ltmp = L_mac( Ltmp, zcn, 10923 ); + Ltmp = L_mac( Ltmp, relEn, 10923 ); + Ltmp = L_mac( Ltmp, pcn, 10923 ); + + fmerit1 = round_fx_o( L_shl_o( Ltmp, 16 - 10 - 1, &Overflow ), &Overflow ); /* fmerit1 ->Q15 */ + + /*-----------------------------------------------------------------* + * FEC classification + *-----------------------------------------------------------------*/ + + st->fmerit_dt = sub( st->prev_fmerit, fmerit1 ); /*Q15*/ + move16(); + st->prev_fmerit = fmerit1; + move16(); + + /* FEC classification */ + test(); + test(); + IF( st->localVAD == 0 || EQ_16( st->coder_type, UNVOICED ) || LT_16( relE, -1536 ) ) + { + clas = UNVOICED_CLAS; + *clas_mod = clas; + move16(); + move16(); + } + ELSE + { + SWITCH( st->last_clas ) + { + case VOICED_CLAS: + case ONSET: + case VOICED_TRANSITION: + + IF( LT_16( fmerit1, 16056 ) ) /*0.49f*/ + { + clas = UNVOICED_CLAS; + move16(); + } + ELSE IF( LT_16( fmerit1, 21626 ) ) /*0.66*/ + { + clas = VOICED_TRANSITION; + move16(); + } + ELSE + { + clas = VOICED_CLAS; + move16(); + } + IF( LT_16( fmerit1, 14745 /* 0.45f*/ ) ) + { + *clas_mod = UNVOICED_CLAS; + move16(); + } + ELSE IF( LT_16( fmerit1, 21626 /* 0.66f*/ ) ) + { + *clas_mod = VOICED_TRANSITION; + move16(); + } + ELSE + { + *clas_mod = VOICED_CLAS; + move16(); + } + BREAK; + + case UNVOICED_CLAS: + case UNVOICED_TRANSITION: + IF( GT_16( fmerit1, 20643 ) ) /*0.63*/ + { + clas = ONSET; + move16(); + } + ELSE IF( GT_16( fmerit1, 19169 ) ) /*0.585*/ + { + clas = UNVOICED_TRANSITION; + move16(); + } + ELSE + { + clas = UNVOICED_CLAS; + move16(); + } + *clas_mod = clas; + move16(); + + BREAK; + + default: + clas = UNVOICED_CLAS; + *clas_mod = clas; + move16(); + move16(); + BREAK; + } + } + /* Onset classification */ + + /* tc_cnt == -1: frame after TC frame in continuous block of GC/VC frames */ + /* tc_cnt == 0: UC frame */ + /* tc_cnt == 1: onset/transition frame, coded by GC coder type */ + /* tc_cnt == 2: frame after onset/transition frame, coded by TC coder type */ + + if ( clas == 0 ) + { + st->tc_cnt = 0; + move16(); + } + + test(); + IF( GE_16( clas, VOICED_TRANSITION ) && st->tc_cnt >= 0 ) + { + st->tc_cnt = add( st->tc_cnt, 1 ); + move16(); + } + + if ( GT_16( st->tc_cnt, 2 ) ) + { + st->tc_cnt = -1; + move16(); + } + return clas; +} + +Word16 signal_clas_ivas_fx( /* o : classification for current frames */ + Encoder_State *st, /* i/o: encoder state structure */ + const Word16 *speech, /* i : pointer to speech signal for E computation in Qx */ + const Word32 *ee, /* i : lf/hf E ration for 2 half-frames in Q6 */ + const Word16 relE, /* i : frame relative E to the long term average in Q8 */ + const Word16 L_look, /* i : look-ahead */ + Word16 *clas_mod /* o : class flag for NOOP detection */ +) { Word32 Ltmp; Word16 mean_voi2, een, corn, zcn, relEn, pcn, fmerit1; @@ -273,6 +503,7 @@ Word16 signal_clas_fx( /* o : classification for current } return clas; } + /*-------------------------------------------------------------------* * select_TC_fx() * -- GitLab