From 78d95bc67e662d0a263ba5324a510a9dd29b34d1 Mon Sep 17 00:00:00 2001 From: Sandesh Venkatesh Date: Tue, 26 Nov 2024 14:44:09 +0530 Subject: [PATCH 1/3] Encoder float code clean up and LTV crash fixes --- lib_com/ivas_spar_com.c | 46 ++++-- lib_enc/acelp_core_enc.c | 4 + lib_enc/analy_sp_fx.c | 226 ++++++++++++++++++++++++------ lib_enc/ivas_front_vad.c | 27 +++- lib_enc/ivas_masa_enc.c | 2 +- lib_enc/ivas_stat_enc.h | 1 + lib_enc/ivas_stereo_td_analysis.c | 8 +- lib_enc/multi_harm_fx.c | 2 +- lib_enc/pitch_ol2.c | 11 +- lib_enc/speech_music_classif_fx.c | 2 +- 10 files changed, 264 insertions(+), 65 deletions(-) diff --git a/lib_com/ivas_spar_com.c b/lib_com/ivas_spar_com.c index 11ee4b3d6..ec11596b4 100644 --- a/lib_com/ivas_spar_com.c +++ b/lib_com/ivas_spar_com.c @@ -1201,16 +1201,22 @@ static void ivas_get_pred_coeffs_enc_fx( move32(); dm_v_re_q[i][b] = sub( add( add( dm_v_re_q[i][b], DM_F_q[b] ), tmp_shift ), 32 ); move16(); - if ( ppDM_Fv_re[i][b] == 0 ) + // if ( ppDM_Fv_re[i][b] == 0 ) + //{ + // dm_v_re_q[i][b] = Q31; + // move16(); + // } + + IF( ppPred_coeffs_re[i][b] ) { - dm_v_re_q[i][b] = Q31; + *q_pred_coeffs = s_min( *q_pred_coeffs, q_ppPred_coeffs_re[i][b] ); + move16(); + } + IF( ppDM_Fv_re[i][b] ) + { + *q_dm_fv_re = s_min( *q_dm_fv_re, dm_v_re_q[i][b] ); move16(); } - - *q_pred_coeffs = s_min( *q_pred_coeffs, q_ppPred_coeffs_re[i][b] ); - move16(); - *q_dm_fv_re = s_min( *q_dm_fv_re, dm_v_re_q[i][b] ); - move16(); } } FOR( i = 0; i < pred_dim; i++ ) @@ -1772,7 +1778,7 @@ static void ivas_get_Wscaling_factor_enc_fx( test(); IF( EQ_16( active_w, 1 ) && ( dyn_active_w_flag == 0 ) ) { - Word16 guard_bits, q_Gw_sq, q_g_sq, q_min, tmp_exp; + Word16 shift, guard_bits, q_Gw_sq, q_g_sq, q_min, tmp_exp; Word32 Gw_sq, g_sq, tmp; g_sq = 0; @@ -1786,14 +1792,28 @@ static void ivas_get_Wscaling_factor_enc_fx( Gw_sq = BASOP_Util_Divide3232_Scale( cov_real[0][0][b], L_max( postpred_cov_re[0][0], IVAS_FIX_EPS ), &tmp_exp ); // 15-(tmp_exp-(q_cov_real[0][0][b]- q_postpred_cov_re)) q_Gw_sq = add( sub( 15, tmp_exp ), sub( q_cov_real[0][0][b], q_postpred_cov_re ) ); - guard_bits = find_guarded_bits_fx( num_ch ); + shift = MAX16B; + move16(); - FOR( ch = 0; ch < sub( num_ch, 1 ); ch++ ) + FOR( ch = 0; ch < ( num_ch - 1 ); ch++ ) { - abs_val = L_shr( Mpy_32_32( pred_coeffs_re[ch][b], pred_coeffs_re[ch][b] ), guard_bits ); // q=2*q_pred_coeffs_re-guard_bits-31 - g_sq = L_add( g_sq, abs_val ); // q=2*q_pred_coeffs_re-guard_bits-31 + IF( pred_coeffs_re[ch][b] != 0 ) + { + shift = s_min( shift, norm_l( pred_coeffs_re[ch][b] ) ); + } } - q_g_sq = sub( add( q_pred_coeffs_re, q_pred_coeffs_re ), add( 31, guard_bits ) ); + guard_bits = find_guarded_bits_fx( num_ch ); + if ( EQ_16( shift, MAX16B ) ) + { + shift = 0; + move16(); + } + FOR( ch = 0; ch < ( num_ch - 1 ); ch++ ) + { + abs_val = L_shr( Mpy_32_32( L_shl( pred_coeffs_re[ch][b], shift ), L_shl( pred_coeffs_re[ch][b], shift ) ), guard_bits ); // q=2*q_pred_coeffs_re-guard_bits-31 + g_sq = L_add( g_sq, abs_val ); // q=2*q_pred_coeffs_re-guard_bits-31 + } + q_g_sq = sub( shl( add( q_pred_coeffs_re, shift ), 1 ), add( 31, guard_bits ) ); tmp = Mpy_32_32( ONE_IN_Q30 /*4 in Q28*/, Mpy_32_32( dm_f_local, g_sq ) ); // q_g_sq+28-31 q_tmp = sub( q_g_sq, 3 ); diff --git a/lib_enc/acelp_core_enc.c b/lib_enc/acelp_core_enc.c index d77d4b42b..e2fb9b4b7 100644 --- a/lib_enc/acelp_core_enc.c +++ b/lib_enc/acelp_core_enc.c @@ -515,6 +515,10 @@ ivas_error acelp_core_enc( * Configure ACELP bit allocation *-----------------------------------------------------------------*/ + Word16 temp = getScaleFactor16( st->hGSCEnc->last_exc_dct_in_fx, L_FRAME16k ); + Scale_sig( st->hGSCEnc->last_exc_dct_in_fx, L_FRAME16k, temp ); + st->hGSCEnc->Q_last_exc_dct_in = add( st->hGSCEnc->Q_last_exc_dct_in, temp ); + move16(); nb_bits = 0; move16(); st->acelp_cfg.FEC_mode = 0; diff --git a/lib_enc/analy_sp_fx.c b/lib_enc/analy_sp_fx.c index ef19e31b1..01a23f0b2 100644 --- a/lib_enc/analy_sp_fx.c +++ b/lib_enc/analy_sp_fx.c @@ -6,7 +6,7 @@ #include "options.h" #include "cnst.h" #include "basop_util.h" -//#include "prot_fx.h" +// #include "prot_fx.h" #include "prot_fx.h" /* Function prototypes */ #include "prot_fx_enc.h" /* Function prototypes */ #include "rom_enc.h" @@ -20,6 +20,7 @@ static void find_enr( Word16 data[], Word32 band[], Word32 *ptE, Word32 *LEtot, const Word16 min_band, const Word16 max_band, const Word16 Q_new2, const Word32 e_min, Word32 *Bin_E, Word16 BIN_FREQ_FX, Word32 *band_energies ); #ifdef IVAS_FLOAT_FIXED static void ivas_find_enr( Word16 data[], Word32 band[], Word32 *ptE, Word32 *LEtot, const Word16 min_band, const Word16 max_band, const Word16 Q_new2, const Word32 e_min, Word32 *Bin_E, Word16 BIN_FREQ_FX, Word32 *band_energies ); +static void ivas_find_enr1( Word16 *data, Word16 q_data, Word32 *band, Word32 *ptE, Word32 *LEtot, const Word16 min_band, const Word16 max_band, const Word16 Q_new, const Word32 e_min, Word32 *Bin_E, Word16 BIN_FREQ_FX, Word32 *band_energies ); #endif #ifdef IVAS_CODE_CPE static void find_enr_dft( CPE_ENC_HANDLE hCPE, const int32_t input_Fs, float DFT_past_DMX[], float band[], float *ptE, float *Etot, const int16_t min_band, const int16_t max_band, float *Bin_E, float *band_ener ); @@ -593,9 +594,6 @@ void ivas_analy_sp_fx_front( Word32 *pt_bands; Word32 Ltmp, LEtot; Word16 *pt_fft; - Word16 Min_val, Max_val; - Word16 Scale_fac2; - Word16 fft_temp[L_FFT]; /*-----------------------------------------------------------------* * Compute spectrum @@ -609,83 +607,78 @@ void ivas_analy_sp_fx_front( { FOR( i_subfr = 0; i_subfr <= 1; i_subfr++ ) { + /* set pointer to the beginning of the signal for spectral analysis */ + /* set the pointer for first analysis window */ pt = speech + 3 * ( L_SUBFR / 2 ) - L_FFT / 2; IF( i_subfr != 0 ) { + /* set the pointer for second analysis window */ pt = speech + 7 * ( L_SUBFR / 2 ) - L_FFT / 2; } /* Clear 1st value of 1st part, copy 1st value of 2nd part */ - fft_temp[0] = 0; + pt_fft[0] = 0; move16(); - fft_temp[L_FFT / 2] = pt[L_FFT / 2]; + pt_fft[L_FFT / 2] = pt[L_FFT / 2]; move16(); - Max_val = s_max( fft_temp[0], fft_temp[L_FFT / 2] ); - Min_val = s_min( fft_temp[0], fft_temp[L_FFT / 2] ); FOR( i = 1; i < L_FFT / 2; i++ ) { /* 1st windowed part */ - fft_temp[i] = mult_r( pt[i], sqrt_han_window_fx[i] ); + pt_fft[i] = mult_r( pt[i], sqrt_han_window_fx[i] ); move16(); - if ( fft_temp[i] > 0 ) - Max_val = s_max( Max_val, fft_temp[i] ); - if ( fft_temp[i] < 0 ) - Min_val = s_min( Min_val, fft_temp[i] ); /* 2nd windowed part */ - fft_temp[L_FFT - i] = mult_r( pt[L_FFT - i], sqrt_han_window_fx[i] ); + pt_fft[L_FFT - i] = mult_r( pt[L_FFT - i], sqrt_han_window_fx[i] ); move16(); - if ( fft_temp[L_FFT - i] > 0 ) - Max_val = s_max( Max_val, fft_temp[L_FFT - i] ); - if ( fft_temp[L_FFT - i] < 0 ) - Min_val = s_min( Min_val, fft_temp[L_FFT - i] ); } - /* Combine -Min_val and Max_val into one */ - Max_val = s_max( negate( Min_val ), Max_val ); - - Scale_fac[i_subfr] = s_min( sub( norm_s( Max_val ), 1 ), 6 ); + Scale_fac[i_subfr] = 0; move16(); - Scale_fac2 = shl( Scale_fac[i_subfr], 1 ); - Scale_sig( fft_temp, L_FRAME_12k8, Scale_fac[i_subfr] ); - r_fft_fx_lc( FFT_W128, SIZE_256, SIZE2_256, NUM_STAGE_256, fft_temp, pt_fft, 1 ); - Scale_sig( pt_fft, L_FFT, -1 ); // Q(-1) - /*e_min_scaled = Q_new + QSCALE + 2*/ - ivas_find_enr( pt_fft, pt_bands, lf_E + i_subfr * VOIC_BINS, &LEtot, min_band, max_band, - add( Q_new, Scale_fac2 ), e_min_scaled, &Bin_E[i_subfr * L_FFT / 2], BIN, band_energies + i_subfr * NB_BANDS ); + /* compute the spectrum */ + fft_rel_fx( pt_fft, L_FFT, LOG2_L_FFT ); + + /* find energy per critical band */ + ivas_find_enr1( pt_fft, add( Q_new, Scale_fac[i_subfr] ), pt_bands, lf_E + i_subfr * VOIC_BINS, &LEtot, min_band, max_band, + Q_new, e_min_scaled, &Bin_E[i_subfr * L_FFT / 2], BIN, band_energies + i_subfr * NB_BANDS ); + pt_bands += NB_BANDS; pt_fft += L_FFT; } - LEtot = L_shl_sat( LEtot, 2 ); // Q_new + Q_SCALE - 2 + LEtot = L_shr( LEtot, 2 ); // Q_new + Q_SCALE - 2 } ELSE { - Word16 Q_inp_dmx = Q_factor_arrL( hCPE->hStereoDft->DFT[0], STEREO_DFT_N_MAX_ENC ); - floatToFixed_arrL( hCPE->hStereoDft->DFT[0], hCPE->hStereoDft->DFT_fx[0], Q_inp_dmx, STEREO_DFT_N_MAX_ENC ); - Word16 Qout = add( Q_new, QSCALE - 2 ); - find_enr_dft_fx( hCPE, input_Fs, hCPE->hStereoDft->DFT_fx[0], pt_bands, lf_E, &LEtot, min_band, max_band, Bin_E, band_energies, Q_inp_dmx, Qout ); + find_enr_dft_fx( hCPE, input_Fs, hCPE->hStereoDft->DFT_fx[0], pt_bands, lf_E, &LEtot, min_band, max_band, Bin_E, band_energies, sub( Q31, hCPE->hStereoDft->DFT_fx_e[0] ), add( Q_new, QSCALE - 2 ) ); MVR2R_WORD32( lf_E, lf_E + VOIC_BINS, VOIC_BINS ); MVR2R_WORD32( Bin_E, Bin_E + ( L_FFT / 2 ), L_FFT / 2 ); MVR2R_WORD32( band_energies, band_energies + NB_BANDS, NB_BANDS ); MVR2R_WORD32( pt_bands, pt_bands + NB_BANDS, NB_BANDS ); LEtot = L_shl( LEtot, 1 ); } + + /* Average total log energy over both half-frames */ Word32 temp32_log; - temp32_log = L_add( BASOP_Util_Log2( L_shr( LEtot, 1 ) ), L_shl( sub( Q31, add( Q_new, QSCALE - Q2 - 1 ) ), Q25 ) ); - temp32_log = Mpy_32_32( temp32_log, 1616142483 ); // log10(x) = log2(x)/log2(10) - /* 10.0 * log10( (float) tmp )*/ - /* 10.0/log2(10) in Q29 = 1616142483*/ - *Etot = extract_l( L_shr( temp32_log, 23 - 8 ) ); // Q8 + IF( LEtot == 0 ) + { + *Etot = -12800 /* 10.f * logf(0.00001f) in Q8 */; + move16(); + } + ELSE + { + temp32_log = BASOP_Util_Log10( L_shr( LEtot, 1 ), sub( Q31, add( Q_new, QSCALE - Q2 ) ) ); // Q25 + temp32_log = Mpy_32_32( temp32_log, 1342177280 /* 10.f in Q27 */ ); // (Q25, Q27) -> Q21 + *Etot = extract_l( L_shr( temp32_log, Q21 - Q8 ) ); // Q8 + move16(); + } + /* Per-bin log-energy spectrum */ Bin_E[L_FFT / 2 - 1] = Bin_E[L_FFT / 2 - 2]; move32(); Bin_E[L_FFT - 1] = Bin_E[L_FFT - 2]; move32(); - /* Per-bin log-energy spectrum */ - FOR( i = 0; i < L_FFT / 2; i++ ) { Bin_E_old[i] = Bin_E[i]; @@ -980,6 +973,157 @@ static void ivas_find_enr( return; } +#ifdef IVAS_FLOAT_FIXED +/* Merge with ivas_find_enr function once analy_sp is unified */ +static void ivas_find_enr1( + Word16 data[], /* i : fft result */ + Word16 q_data, /* i : Q of fft result */ + Word32 band[], /* o : per band energy Q_new + QSCALE */ + Word32 *ptE, /* o : per bin energy for low frequencies Q_new + QSCALE-2 */ + Word32 *LEtot, /* o : total energy Q_new + QSCALE */ + const Word16 min_band, /* i : minimum critical band Q0 */ + const Word16 max_band, /* i : maximum critical band Q0 */ + const Word16 Q_new, /* i : scaling factor Q0 */ + const Word32 e_min, /* i : minimum energy scaled Q_new + QSCALE */ + Word32 *Bin_E, /* o : Per bin energy Q_new + QSCALE-2 */ + Word16 BIN_FREQ_FX, /* i : Number of frequency bins */ + Word32 *band_energies /* o : per band energy without MODE2_E_MIN */ +) +{ + Word16 i, cnt; + Word16 freq; + Word16 *ptR, *ptI; + Word32 Ltmp; + Word16 voic_band; + Word32 etot; + Word32 norm_val; + + norm_val = 131072 /* 4.0f / ( L_FFT * L_FFT ) in Q31 */; + + ptR = &data[1]; /* first real */ + ptI = &data[L_FFT - 1]; /* first imaginary */ + + voic_band = VOIC_BAND_8k; + move16(); + assert( VOIC_BAND == VOIC_BAND_8k ); + + /*-----------------------------------------------------------------* + * For low frequency bins, save per bin energy for the use + * in NS and find_tilt() + *-----------------------------------------------------------------*/ + + freq = BIN_FREQ_FX; + move16(); + FOR( i = 0; i < voic_band; i++ ) /* up to maximum allowed voiced critical band */ + { + band[i] = 0; + move32(); + cnt = 0; + move16(); + WHILE( freq <= crit_bands[i] ) + { + // *ptE = *ptR * *ptR + *ptI * *ptI; /* energy */ + Word64 te = ( W_add( W_mult0_32_32( *ptR, *ptR ), W_mult0_32_32( *ptI, *ptI ) ) ); // 2 * Qfft + Word16 te_exp = W_norm( te ); + te = W_shl( te, te_exp ); // 2 * Qfft + te_exp + Ltmp = W_extract_h( te ); // 2 * Qfft + te_exp - 32 + + // *ptE *= norm_val; /* normalization - corresponds to FFT normalization by 2/L_FFT */ + Ltmp = Mpy_32_32( Ltmp, norm_val ); // 2 * Qfft + te_exp - 32 + *ptE = L_shl( Ltmp, sub( add( Q_new, QSCALE - 2 ), add( shl( q_data, Q1 ), sub( te_exp, 32 ) ) ) ); // Q_new + QSCALE - 2 + move32(); + + *Bin_E++ = *ptE; // Q_new + QSCALE - 2 + move32(); + band[i] = L_add( band[i], *ptE++ ); // Q_new + QSCALE - 2 + move32(); + ptR++; + ptI--; + + freq = add( freq, BIN_FREQ_FX ); + cnt = add( cnt, 1 ); + } + + Ltmp = Mpy_32_16_1( band[i], inv_tbl_fx[cnt] ); /* normalization per frequency bin */ // Q_new + QSCALE - 2 + band[i] = L_shl( Ltmp, Q2 ); // Q_new + QSCALE + move32(); + + band_energies[i] = L_shl( band[i], Q2 ); /* per band energy without E_MIN */ // Q_new + QSCALE + 2 + move32(); + + if ( LT_32( band[i], e_min ) ) // Q_new + QSCALE + { + band[i] = e_min; // Q_new + QSCALE + move32(); + } + } + + IF( EQ_16( BIN_FREQ_FX, 50 ) ) + { + /*-----------------------------------------------------------------* + * Continue compute the E per critical band for high frequencies + *-----------------------------------------------------------------*/ + + FOR( i = voic_band; i < NB_BANDS; i++ ) + { + band[i] = 0; + move32(); + cnt = 0; + move16(); + WHILE( freq <= crit_bands[i] ) + { + // *Bin_E = *ptR * *ptR + *ptI * *ptI; + Word64 te = ( W_add( W_mult0_32_32( *ptR, *ptR ), W_mult0_32_32( *ptI, *ptI ) ) ); // 2 * Qfft + Word16 te_exp = W_norm( te ); + te = W_shl( te, te_exp ); // 2 * Qfft + te_exp + Ltmp = W_extract_h( te ); // 2 * Qfft + te_exp - 32 + + // *Bin_E *= norm_val; + Ltmp = Mpy_32_32( Ltmp, norm_val ); // 2 * Qfft + te_exp - 32 + *Bin_E = L_shl( Ltmp, sub( add( Q_new, QSCALE - 2 ), add( shl( q_data, Q1 ), sub( te_exp, 32 ) ) ) ); // Q_new + QSCALE - 2 + move32(); + + band[i] = L_add( band[i], *Bin_E++ ); // Q_new + QSCALE - 2 + move32(); + ptR++; + ptI--; + + freq = add( freq, BIN_FREQ_FX ); + cnt = add( cnt, 1 ); + } + + Ltmp = Mpy_32_16_1( band[i], inv_tbl_fx[cnt] ); /* normalization per frequency bin */ // Q_new + QSCALE - 2 + band[i] = L_shl( Ltmp, Q2 ); // Q_new + QSCALE + move32(); + + band_energies[i] = L_shl( band[i], Q2 ); /* per band energy without E_MIN */ // Q_new + QSCALE + 2 + move32(); + + if ( LT_32( band[i], e_min ) ) // Q_new + QSCALE + { + band[i] = e_min; // Q_new + QSCALE + move32(); + } + } + } + + /*-----------------------------------------------------------------* + * Find the total energy over the input bandwidth + *-----------------------------------------------------------------*/ + + etot = *LEtot; + move32(); + FOR( i = min_band; i <= max_band; i++ ) + { + etot = L_add( etot, band[i] ); // Q_new + QSCALE + } + *LEtot = etot; + move32(); + + return; +} +#endif + static void find_enr( Word16 data[], /* i : fft result */ Word32 band[], /* o : per band energy Q_new + QSCALE */ diff --git a/lib_enc/ivas_front_vad.c b/lib_enc/ivas_front_vad.c index e42195b67..852948fd0 100644 --- a/lib_enc/ivas_front_vad.c +++ b/lib_enc/ivas_front_vad.c @@ -397,7 +397,10 @@ ivas_error front_vad_fx( modify_Fs_fx( sts[n]->input_fx, input_frame, sts[0]->input_Fs, hFrontVad->buffer_12k8_fx + L_FFT / 2, INT_FS_12k8, hFrontVad->mem_decim_fx, ( sts[0]->max_bwidth == NB ), &Qband, &mem_decim_size ); /* Preemphasis */ - hFrontVad->mem_preemph_fx = shl( hFrontVad->mem_preemph_fx, -1 - Qband ); + hFrontVad->mem_preemph_fx = shl( hFrontVad->mem_preemph_fx, sub( add( Q_inp, Qband ), hFrontVad->q_mem_preemph_fx ) ); + move16(); + hFrontVad->q_mem_preemph_fx = add( Q_inp, Qband ); + move16(); PREEMPH_FX( hFrontVad->buffer_12k8_fx + L_FFT / 2, PREEMPH_FAC, L_FRAME, &hFrontVad->mem_preemph_fx ); @@ -542,6 +545,7 @@ ivas_error front_vad_create( set16_fx( hFrontVad->mem_decim_fx, 0, shl( L_FILT_MAX, 1 ) ); set16_fx( hFrontVad->buffer_12k8_fx, 0, i_mult( 3, shr( L_FRAME, 1 ) ) ); hFrontVad->mem_preemph_fx = 0; + hFrontVad->q_mem_preemph_fx = 0; hFrontVad->q_buffer_12k8 = Q31; hFrontVad->q_mem_decim = Q31; #else @@ -785,7 +789,7 @@ ivas_error front_vad_spar_fx( * Initialization *-----------------------------------------------------------------*/ // inp_12k8 = hFrontVad->buffer_12k8; - Word16 Q_bands = Q31; + Word16 Q_bands = Q31, tmp1; Word16 Q_inp_12k8 = hFrontVad->q_buffer_12k8; move16(); move16(); @@ -830,6 +834,14 @@ ivas_error front_vad_spar_fx( s = 15; move16(); } + + maximum_abs_16_fx( hFrontVad->mem_decim_fx, 2 * L_FILT_MAX, &tmp1 ); + IF( tmp1 != 0 ) + { + tmp1 = norm_s( tmp1 ); + tmp1 = add( tmp1, hFrontVad->q_mem_decim ); + s = s_min( s, tmp1 ); + } IF( tmp != 0 ) { Scale_sig( st->input_fx, input_frame, s ); @@ -866,14 +878,20 @@ ivas_error front_vad_spar_fx( #endif #endif Word16 Q_buffer = hFrontVad->q_buffer_12k8; + move16(); Scale_sig( hFrontVad->mem_decim_fx, 2 * L_FILT_MAX, sub( Q_inp, hFrontVad->q_mem_decim ) ); hFrontVad->q_mem_decim = Q_inp; + move16(); IF( ( error = front_vad_fx( NULL, st, hEncoderConfig, &hFrontVad, 0 /* MCT_flag */, input_frame, vad_flag_dtx, fr_bands_fx, Etot_fx, lf_E_fx, localVAD_HE_SAD, vad_hover_flag, band_energies_fx, &PS_fx[0], &st->lgBin_E_fx[0], Q_inp, &Q_buffer, Q_add, &front_create_flag ) ) != IVAS_ERR_OK ) { return error; } Scale_sig( hFrontVad->buffer_12k8_fx + 384, 3 * L_FRAME / 2 - 384, sub( Q_buffer, hFrontVad->q_buffer_12k8 ) ); hFrontVad->q_buffer_12k8 = Q_buffer; + move16(); + Q_inp_12k8 = hFrontVad->q_buffer_12k8; + move16(); + IF( st->lgBin_E_fx != NULL ) { Copy_Scale_sig_16_32( st->lgBin_E_fx, st->Bin_E_fx, L_FFT / 2, sub( st->q_Bin_E, Q7 ) ); @@ -922,7 +940,10 @@ ivas_error front_vad_spar_fx( Scale_sig( inp_12k8_fx - 2 * L_FILT_MAX, 2 * L_FILT_MAX, sub( s_min( Q_inp_12k8, hFrontVad->q_mem_decim ), hFrontVad->q_mem_decim ) ); Scale_sig( inp_12k8_fx, 3 * L_FRAME / 2, sub( s_min( Q_inp_12k8, hFrontVad->q_mem_decim ), Q_inp_12k8 ) ); Q_inp_12k8 = s_min( hFrontVad->q_mem_decim, hFrontVad->q_buffer_12k8 ); - hFrontVad->q_mem_decim = s_min( hFrontVad->q_mem_decim, hFrontVad->q_buffer_12k8 ); + hFrontVad->q_mem_decim = Q_inp_12k8; + move16(); + hFrontVad->q_buffer_12k8 = Q_inp_12k8; + move16(); analy_lp_ivas_fx( inp_12k8_fx, L_FRAME, L_LOOK_12k8, &res_energy_fx, A_fx, epsP_h, epsP_l, lsp_new_fx, lsp_mid_fx, st->lsp_old1_fx, alw_pitch_lag_12k8, alw_voicing_fx, INT_FS_12k8, 0 /* <-- sec_chan_low_rate */, Q_inp_12k8, Q_r ); diff --git a/lib_enc/ivas_masa_enc.c b/lib_enc/ivas_masa_enc.c index be113f841..0faf06778 100644 --- a/lib_enc/ivas_masa_enc.c +++ b/lib_enc/ivas_masa_enc.c @@ -2659,7 +2659,7 @@ static void combine_freqbands_and_subframes_fx( } surrCohTemp = BASOP_Util_Divide3232_Scale( surrCohSum, L_add( energySum, EPSILON_FX ), &exp_diff ); exp_diff = add( exp_diff, sub( surrCohSum_e, energySum_e ) ); - surrCohTemp = shl( surrCohTemp, exp_diff ); // Q15 + surrCohTemp = shl_sat( surrCohTemp, exp_diff ); // Q15 FOR( j = 0; j < numSf; j++ ) { diff --git a/lib_enc/ivas_stat_enc.h b/lib_enc/ivas_stat_enc.h index 61678b6af..74a6bc8eb 100644 --- a/lib_enc/ivas_stat_enc.h +++ b/lib_enc/ivas_stat_enc.h @@ -905,6 +905,7 @@ typedef struct front_vad_enc #endif Word16 mem_preemph_fx; /* preemph filter memory */ + Word16 q_mem_preemph_fx; /* preemph filter memory */ NOISE_EST_HANDLE hNoiseEst; /* Noise estimation handle */ VAD_HANDLE hVAD; /* VAD handle */ Word16 *delay_buf_fx; diff --git a/lib_enc/ivas_stereo_td_analysis.c b/lib_enc/ivas_stereo_td_analysis.c index b73712ef7..9abe856a0 100644 --- a/lib_enc/ivas_stereo_td_analysis.c +++ b/lib_enc/ivas_stereo_td_analysis.c @@ -2854,13 +2854,13 @@ Word16 tdm_lp_comparison_fx( exp = -5; move16(); } - sum2_value = L_add( sum2_value, L_shl( 328, sub( sub( 31, exp ), 15 ) ) ); + sum2_value = BASOP_Util_Add_Mant32Exp( sum2_value, exp, 328, Q31 - Q15, &exp ); Word32 temp32_log = L_add( BASOP_Util_Log2( sum2_value ), L_shl( sub( Q31, sub( 31, exp ) ), Q25 ) ); ener_sig_fx = Mpy_32_32( temp32_log, 646456623 ); // Q25 // ener_sig = log10f( sum2_f( speech, L_frame ) + 0.01f ); exp = sub( 31, sub( Q_speech, gb1 ) ); sum2_value = sum2_32_exp_fx( res_fx, L_frame, &exp, gb ); // 2*Q_speech -31-gb - sum2_value = L_add( sum2_value, L_shl( 328, sub( sub( 31, exp ), 15 ) ) ); + sum2_value = BASOP_Util_Add_Mant32Exp( sum2_value, exp, 328, Q31 - Q15, &exp ); temp32_log = L_add( BASOP_Util_Log2( sum2_value ), L_shl( sub( Q31, sub( 31, exp ) ), Q25 ) ); log10_fx = Mpy_32_32( temp32_log, 646456623 ); // Q25 predgain_SCh_fx = Mpy_32_32( L_sub( ener_sig_fx, log10_fx ), 1342177280 ); // Q27+Q25-31 = Q21 @@ -2870,10 +2870,10 @@ Word16 tdm_lp_comparison_fx( /* Find prediction gain when resuing the Primary Channel LP filter */ // residu( A_PCh, m, speech, res, L_frame ); scale_sig32( speech_buff, L_FRAME + M, negate( gb1 ) ); - residu_ivas_fx( A_PCh_fx, Q12, m, speech_fx, res_fx, L_frame ); + residu_ivas_fx( A_PCh_fx, sub( 14, norm_s( A_PCh_fx[0] ) ), m, speech_fx, res_fx, L_frame ); exp = sub( 31, sub( Q_speech, gb1 ) ); sum2_value = sum2_32_exp_fx( res_fx, L_frame, &exp, gb ); // 2*Q_speech -31-gb - sum2_value = L_add( sum2_value, L_shl( 328, sub( sub( 31, exp ), 15 ) ) ); + sum2_value = BASOP_Util_Add_Mant32Exp( sum2_value, exp, 328, Q31 - Q15, &exp ); temp32_log = L_add( BASOP_Util_Log2( sum2_value ), L_shl( sub( Q31, sub( 31, exp ) ), Q25 ) ); log10_fx = Mpy_32_32( temp32_log, 646456623 ); // Q25 diff --git a/lib_enc/multi_harm_fx.c b/lib_enc/multi_harm_fx.c index 862df80e9..43e5ff5bb 100644 --- a/lib_enc/multi_harm_fx.c +++ b/lib_enc/multi_harm_fx.c @@ -134,7 +134,7 @@ Word16 multi_harm_fx( /* o : frame multi-harmonicity /* calculate the new step */ /*step = (Bin_E[*pt_mins] - Bin_E[i]) / (*pt_mins-i);*/ tmp16 = sub( *pt_mins, i ); - tmpdB = sub_sat( Bin_E[*pt_mins], Bin_E[i] ); + tmpdB = sub( Bin_E[*pt_mins], Bin_E[i] ); sign_fx = shr( tmpdB, 15 ); /* 0 if positive else -1 */ ExpdB = sub( norm_s( tmpdB ), 1 ); tmpdB = abs_s( shl( tmpdB, ExpdB ) ); diff --git a/lib_enc/pitch_ol2.c b/lib_enc/pitch_ol2.c index 11c816513..17be8dec6 100644 --- a/lib_enc/pitch_ol2.c +++ b/lib_enc/pitch_ol2.c @@ -310,7 +310,16 @@ void StableHighPitchDetect_ivas_fx( /* short pitch possiblity pre-decision */ maximum_fx( EspecdB, 7, &energy0_16 ); maximum_fx( EspecdB + 8, 7, &energy1_16 ); - ratio = s_max( sub( energy1_16, energy0_16 ), 0 ); /*Q7 */ + test(); + IF( energy1_16 < 0 && energy0_16 > 0 ) + { + ratio = 0; + move16(); + } + ELSE + { + ratio = s_max( sub( energy1_16, energy0_16 ), 0 ); /*Q7 */ + } /*ratio *= max(voicing,0);*/ tmp = s_max( voicing_m, 0 ); ratio = mult_r( ratio, tmp ); /*Q7*/ diff --git a/lib_enc/speech_music_classif_fx.c b/lib_enc/speech_music_classif_fx.c index ffa8e3a4c..74be752ff 100644 --- a/lib_enc/speech_music_classif_fx.c +++ b/lib_enc/speech_music_classif_fx.c @@ -3168,7 +3168,7 @@ void ivas_smc_mode_selection_fx( { test(); IF( GT_16( hSpMusClas->gsc_lt_diff_etot_fx[MAX_LT - 1], 1152 /*4.5f in Q8*/ ) && - ( GT_16( sub( hSpMusClas->gsc_lt_diff_etot_fx[MAX_LT - 1], hSpMusClas->gsc_lt_diff_etot_fx[MAX_LT - 2] ), 2560 /* 10.0f in Q8 */ ) ) ) + ( GT_16( hSpMusClas->gsc_lt_diff_etot_fx[MAX_LT - 1], add( hSpMusClas->gsc_lt_diff_etot_fx[MAX_LT - 2], 2560 /* 10.0f in Q8 */ ) ) ) ) { IF( EQ_16( st->tc_cnt, 1 ) ) { -- GitLab From 0db55faf4a1d926db085354acffe5e641b95a9de Mon Sep 17 00:00:00 2001 From: Sandesh Venkatesh Date: Tue, 26 Nov 2024 15:58:54 +0530 Subject: [PATCH 2/3] Fix for crash with STV original --- lib_enc/analy_sp_fx.c | 5 +++-- lib_enc/ivas_core_pre_proc_front.c | 11 ++++++----- lib_enc/nois_est_fx.c | 8 ++++++++ 3 files changed, 17 insertions(+), 7 deletions(-) diff --git a/lib_enc/analy_sp_fx.c b/lib_enc/analy_sp_fx.c index 01a23f0b2..5a29fffa8 100644 --- a/lib_enc/analy_sp_fx.c +++ b/lib_enc/analy_sp_fx.c @@ -633,8 +633,9 @@ void ivas_analy_sp_fx_front( move16(); } - Scale_fac[i_subfr] = 0; + Scale_fac[i_subfr] = -1; move16(); + Scale_sig( pt_fft, L_FFT, Scale_fac[i_subfr] ); /* compute the spectrum */ fft_rel_fx( pt_fft, L_FFT, LOG2_L_FFT ); @@ -685,7 +686,7 @@ void ivas_analy_sp_fx_front( move32(); /* tmp = (input[i] + input[i+Len]+0.001f)/2.0f */ // Ltmp = L_max( L_shr( 21474, sub( Q31, add( Q_new, QSCALE ) ) ), L_add( L_shr( Bin_E[i], 1 ), L_shr( Bin_E[i + L_FFT / 2], 1 ) ) ); - Ltmp = L_shr( L_add( Bin_E[i], Bin_E[i + L_FFT / 2] ), 1 ); // average + Ltmp = L_add( L_shr( Bin_E[i], 1 ), L_shr( Bin_E[i + L_FFT / 2], 1 ) ); // average if ( Ltmp == 0 ) { // Ltmp = L_min( L_shr( 21474, sub( Q31, add( Q_new, QSCALE - 2 ) ) ), 1 ); diff --git a/lib_enc/ivas_core_pre_proc_front.c b/lib_enc/ivas_core_pre_proc_front.c index 44a01b6fb..3cea45ea4 100644 --- a/lib_enc/ivas_core_pre_proc_front.c +++ b/lib_enc/ivas_core_pre_proc_front.c @@ -1566,6 +1566,7 @@ ivas_error pre_proc_front_ivas_fx( { Word16 Q_inp_dmx = Q_factor_arrL( hCPE->hStereoDft->DFT[0], STEREO_DFT_N_MAX_ENC ) - 1; floatToFixed_arrL( hCPE->hStereoDft->DFT[0], hCPE->hStereoDft->DFT_fx[0], Q_inp_dmx, STEREO_DFT_N_MAX_ENC ); + hCPE->hStereoDft->DFT_fx_e[0] = 31 - Q_inp_dmx; } Word16 Q_new; Word16 Scale_fac[2]; @@ -1594,15 +1595,15 @@ ivas_error pre_proc_front_ivas_fx( } #ifdef IVAS_FLOAT_FIXED_CONVERSIONS - fixedToFloat_arrL( fr_bands_fx, fr_bands, Q_new + QSCALE - 1, 40 ); - fixedToFloat_arrL( band_energies_fx, band_energies, Q_new + QSCALE - 1, 40 ); - fixedToFloat_arrL( lf_E_fx, lf_E, Q_new + QSCALE - 2 - 1, 148 ); - fixedToFloat_arrL( PS_fx, PS, Q_new + QSCALE - 2 - 1, 128 ); + fixedToFloat_arrL( fr_bands_fx, fr_bands, Q_new + QSCALE - 2, 40 ); + fixedToFloat_arrL( band_energies_fx, band_energies, Q_new + QSCALE + 2, 40 ); + fixedToFloat_arrL( lf_E_fx, lf_E, Q_new + QSCALE, 148 ); + fixedToFloat_arrL( PS_fx, PS, Q_new + QSCALE - 2, 128 ); Etot = fixedToFloat( Etot_fx, Q8 ); // fixedToFloat_arr( fft_buff_fx, fft_buff, Q_new + QSCALE + Scale_fac[0] - 14 -1, 512 ); for ( int i_sbfr = 0; i_sbfr < 2; i_sbfr++ ) { - fixedToFloat_arr( fft_buff_fx + i_sbfr * L_FFT, fft_buff + i_sbfr * L_FFT, Q_new + QSCALE + Scale_fac[i_sbfr] - 14 - 1, L_FFT ); + fixedToFloat_arr( fft_buff_fx + i_sbfr * L_FFT, fft_buff + i_sbfr * L_FFT, Q_new + Scale_fac[i_sbfr], L_FFT ); } #endif #endif diff --git a/lib_enc/nois_est_fx.c b/lib_enc/nois_est_fx.c index eb04543a4..ccf16510d 100644 --- a/lib_enc/nois_est_fx.c +++ b/lib_enc/nois_est_fx.c @@ -2583,6 +2583,10 @@ void noise_est_ivas_fx( /* else */ /* non_sta2 = non_sta2 * ((st_ave_enr2[i]+1) / (enr[i]+1)) */ Lden = L_min( L_tmp_enr, L_tmp_ave_enr ); + if ( Lden == 0 ) + { + Lden = L_add( Lden, EPSILON_FX ); + } ExpNum = sub( norm_l( Lnum ), 1 ); num = extract_h( L_shl( Lnum, ExpNum ) ); @@ -2626,6 +2630,10 @@ void noise_est_ivas_fx( { Lnum = L_max( L_tmp_enr, L_tmp_ave_enr2 ); Lden = L_min( L_tmp_enr, L_tmp_ave_enr2 ); + if ( Lden == 0 ) + { + Lden = L_add( Lden, EPSILON_FX ); + } ExpNum = sub( norm_l( Lnum ), 1 ); num = extract_h( L_shl( Lnum, ExpNum ) ); -- GitLab From 045ab91e3ea6fca7e077bad19cdedd504a6d2d14 Mon Sep 17 00:00:00 2001 From: Sandesh Venkatesh Date: Tue, 26 Nov 2024 17:16:29 +0530 Subject: [PATCH 3/3] Fix for +10dB crashes --- lib_enc/analy_sp_fx.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/lib_enc/analy_sp_fx.c b/lib_enc/analy_sp_fx.c index 5a29fffa8..c53aa5386 100644 --- a/lib_enc/analy_sp_fx.c +++ b/lib_enc/analy_sp_fx.c @@ -647,7 +647,6 @@ void ivas_analy_sp_fx_front( pt_bands += NB_BANDS; pt_fft += L_FFT; } - LEtot = L_shr( LEtot, 2 ); // Q_new + Q_SCALE - 2 } ELSE { @@ -981,7 +980,7 @@ static void ivas_find_enr1( Word16 q_data, /* i : Q of fft result */ Word32 band[], /* o : per band energy Q_new + QSCALE */ Word32 *ptE, /* o : per bin energy for low frequencies Q_new + QSCALE-2 */ - Word32 *LEtot, /* o : total energy Q_new + QSCALE */ + Word32 *LEtot, /* o : total energy Q_new + QSCALE-2 */ const Word16 min_band, /* i : minimum critical band Q0 */ const Word16 max_band, /* i : maximum critical band Q0 */ const Word16 Q_new, /* i : scaling factor Q0 */ @@ -1116,7 +1115,7 @@ static void ivas_find_enr1( move32(); FOR( i = min_band; i <= max_band; i++ ) { - etot = L_add( etot, band[i] ); // Q_new + QSCALE + etot = L_add( etot, L_shr( band[i], Q2 ) ); // Q_new + QSCALE - 2 } *LEtot = etot; move32(); -- GitLab