From 615535d47f8554c226619596f5b5f5299a8643c4 Mon Sep 17 00:00:00 2001 From: vaclav Date: Thu, 23 Jan 2025 12:53:01 +0100 Subject: [PATCH] port float issue 1211: fix crash in MASA DTX bitrate switching; under NONBE_1211_DTX_BR_SWITCHING --- lib_com/ivas_prot.h | 5 ++++- lib_com/options.h | 2 ++ lib_com/prot.h | 23 +++++++++++++---------- lib_enc/amr_wb_enc.c | 6 ++++++ lib_enc/dtx.c | 29 ++++++++++++++++++++++++++++- lib_enc/ivas_core_pre_proc_front.c | 9 ++++++++- lib_enc/ivas_cpe_enc.c | 8 ++++++-- lib_enc/ivas_front_vad.c | 5 +++++ lib_enc/ivas_ism_enc.c | 4 ++++ lib_enc/ivas_osba_enc.c | 1 - lib_enc/ivas_sce_enc.c | 4 ++++ lib_enc/pre_proc.c | 22 ++++++++++++++-------- 12 files changed, 94 insertions(+), 24 deletions(-) diff --git a/lib_com/ivas_prot.h b/lib_com/ivas_prot.h index e39433fec..496471f2b 100644 --- a/lib_com/ivas_prot.h +++ b/lib_com/ivas_prot.h @@ -199,7 +199,10 @@ ivas_error pre_proc_front_ivas( const int16_t force_front_vad, /* i : flag to force VAD decision */ const int16_t front_vad_dtx_flag, /* i : front-VAD DTX flag to overwrite VAD decision*/ const IVAS_FORMAT ivas_format, /* i : IVAS format */ - const int16_t MCT_flag, /* i : hMCT handle allocated (1) or not (0) */ + const int16_t MCT_flag, /* i : hMCT handle allocated (1) or not (0) */ +#ifdef NONBE_1211_DTX_BR_SWITCHING + const int32_t last_ivas_total_brate, /* i : last IVAS total bitrate */ +#endif const int32_t ivas_total_brate /* i : IVAS total bitrate */ #ifdef DEBUG_MODE_INFO , const int16_t ch_idx diff --git a/lib_com/options.h b/lib_com/options.h index a23a28b65..03c171f58 100644 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -172,6 +172,8 @@ #define NONBE_1233_HQ_CLASSIFIER_DIV_BY_ZERO /* Eri: issue 1233: Address possible division by zero in hf_spectrum_sparseness() */ #define NONE_BE_FIX_BASOP_1044_OSBA_PRERENDER_MIX_GAINS /* DLB: adjust prerendering and mixing gain in OSBA encoder. This is fix to float codes*/ +#define NONBE_1211_DTX_BR_SWITCHING /* VA: port float issue 1211: fix crash in MASA DTX bitrate switching */ + /* #################### End FIXES switches ############################ */ #define BASOP_NOGLOB /* Disable global symbols in BASOPs, Overflow/Carry in BASOPs disabled, additional BASOPs in case of Overflow */ diff --git a/lib_com/prot.h b/lib_com/prot.h index f4114fd9a..8d8b05b2f 100644 --- a/lib_com/prot.h +++ b/lib_com/prot.h @@ -2301,14 +2301,14 @@ void amr_wb_enc_init( ); void pre_proc( - Encoder_State *st, /* i/o: encoder state structure */ - const int16_t input_frame, /* i : frame length */ - float old_inp_12k8[], /* i/o: buffer of old input signal */ - float old_inp_16k[], /* i/o: buffer of old input signal @ 16kHz */ - float **inp, /* o : ptr. to inp. signal in the current frame*/ - float fr_bands[2 * NB_BANDS], /* i : energy in frequency bands */ - float *ener, /* o : residual energy from Levinson-Durbin */ - int16_t pitch_orig[3], /* o : open-loop pitch values for quantization */ + Encoder_State *st, /* i/o: encoder state structure */ + const int16_t input_frame, /* i : frame length */ + float old_inp_12k8[], /* i/o: buffer of old input signal */ + float old_inp_16k[], /* i/o: buffer of old input signal @ 16kHz */ + float **inp, /* o : ptr. to inp. signal in the current frame*/ + float fr_bands[2 * NB_BANDS], /* i : energy in frequency bands */ + float *ener, /* o : residual energy from Levinson-Durbin */ + int16_t pitch_orig[3], /* o : open-loop pitch values for quantization */ float A[NB_SUBFR16k * ( M + 1 )], /* i/o: A(z) unquantized for the 4 subframes */ float Aw[NB_SUBFR16k * ( M + 1 )], /* i/o: weighted A(z) unquantized for subframes */ float epsP[M + 1], /* i/o: LP prediction errors */ @@ -3902,8 +3902,11 @@ void td_cng_enc_init( ); void dtx( - Encoder_State *st, /* i/o: encoder state structure */ - const int32_t ivas_total_brate, /* i : IVAS total bitrate */ + Encoder_State *st, /* i/o: encoder state structure */ +#ifdef NONBE_1211_DTX_BR_SWITCHING + const int32_t last_ivas_total_brate, /* i : last IVAS total bitrate */ +#endif + const int32_t ivas_total_brate, /* i : IVAS total bitrate */ const int16_t vad, /* i : VAD flag for DTX */ const float speech[] /* i : Pointer to the speech frame */ ); diff --git a/lib_enc/amr_wb_enc.c b/lib_enc/amr_wb_enc.c index e921698e5..010917925 100644 --- a/lib_enc/amr_wb_enc.c +++ b/lib_enc/amr_wb_enc.c @@ -310,7 +310,13 @@ void amr_wb_enc( { st->fd_cng_reset_flag = 0; } + +#ifdef NONBE_1211_DTX_BR_SWITCHING + dtx( st, -1, -1, vad_flag_dtx, inp ); +#else dtx( st, -1, vad_flag_dtx, inp ); +#endif + /*----------------------------------------------------------------* * Noise energy down-ward update and total noise energy estimation * Long-term energies and relative frame energy updates diff --git a/lib_enc/dtx.c b/lib_enc/dtx.c index 606c8aaaf..c1e842e58 100644 --- a/lib_enc/dtx.c +++ b/lib_enc/dtx.c @@ -78,7 +78,10 @@ static void update_SID_cnt( DTX_ENC_HANDLE hDtxEnc, const int32_t core_brate, co *-------------------------------------------------------------------*/ void dtx( - Encoder_State *st, /* i/o: encoder state structure */ + Encoder_State *st, /* i/o: encoder state structure */ +#ifdef NONBE_1211_DTX_BR_SWITCHING + const int32_t last_ivas_total_brate, /* i : last IVAS total bitrate */ +#endif const int32_t ivas_total_brate, /* i : IVAS total bitrate */ const int16_t vad, /* i : VAD flag for DTX */ const float speech[] /* i : Pointer to the speech frame */ @@ -87,17 +90,31 @@ void dtx( float alpha; DTX_ENC_HANDLE hDtxEnc = st->hDtxEnc; int16_t last_br_cng_flag, last_br_flag, br_dtx_flag; +#ifdef NONBE_1211_DTX_BR_SWITCHING + int32_t total_brate_ref; + + total_brate_ref = st->total_brate; +#endif + if ( st->dtx_sce_sba != 0 ) { last_br_cng_flag = 1; last_br_flag = 1; +#ifndef NONBE_1211_DTX_BR_SWITCHING br_dtx_flag = 1; +#endif } else { last_br_cng_flag = st->last_total_brate_cng <= MAX_BRATE_DTX_EVS || st->lp_noise < 15 || ( st->element_mode == IVAS_SCE && st->last_total_brate_cng <= MAX_BRATE_DTX_IVAS ); +#ifdef NONBE_1211_DTX_BR_SWITCHING + last_br_flag = ( st->element_mode == EVS_MONO && st->last_total_brate <= MAX_BRATE_DTX_EVS ) || + ( st->element_mode != EVS_MONO && last_ivas_total_brate <= MAX_BRATE_DTX_IVAS ) || + st->lp_noise < 15; +#else last_br_flag = st->last_total_brate <= MAX_BRATE_DTX_EVS || st->lp_noise < 15 || ( st->element_mode == IVAS_SCE && st->last_total_brate <= MAX_BRATE_DTX_IVAS ); br_dtx_flag = 0; +#endif } /* Initialization */ @@ -127,6 +144,7 @@ void dtx( last_br_cng_flag ) { st->total_brate = st->last_total_brate_cng; + if ( !( st->total_brate == ACELP_7k20 && st->Opt_SC_VBR ) ) { st->Opt_SC_VBR = 0; @@ -170,6 +188,9 @@ void dtx( * Select SID or FRAME_NO_DATA frame if DTX is enabled *------------------------------------------------------------------------*/ +#ifdef NONBE_1211_DTX_BR_SWITCHING + br_dtx_flag = 1; +#endif if ( st->dtx_sce_sba == 0 ) { br_dtx_flag = ( st->element_mode == EVS_MONO && st->total_brate <= MAX_BRATE_DTX_EVS ) || @@ -258,6 +279,12 @@ void dtx( reset_indices_enc( st->hBstr, st->hBstr->nb_ind_tot ); } } +#ifdef NONBE_1211_DTX_BR_SWITCHING + else if ( st->element_mode != EVS_MONO ) + { + st->total_brate = total_brate_ref; + } +#endif /*------------------------------------------------------------------------* * Reset counters when in active frame (neither SID nor FRAME_NO_DATA frame) diff --git a/lib_enc/ivas_core_pre_proc_front.c b/lib_enc/ivas_core_pre_proc_front.c index 79003e297..3b8b2a448 100644 --- a/lib_enc/ivas_core_pre_proc_front.c +++ b/lib_enc/ivas_core_pre_proc_front.c @@ -112,7 +112,10 @@ ivas_error pre_proc_front_ivas( const int16_t front_vad_dtx_flag, /* i : front-VAD DTX flag to overwrite VAD decision*/ const IVAS_FORMAT ivas_format, /* i : IVAS format */ const int16_t MCT_flag, /* i : hMCT handle allocated (1) or not (0) */ - const int32_t ivas_total_brate /* i : IVAS total bitrate - for setting the DTX */ +#ifdef NONBE_1211_DTX_BR_SWITCHING + const int32_t last_ivas_total_brate, /* i : last IVAS total bitrate */ +#endif + const int32_t ivas_total_brate /* i : IVAS total bitrate - for setting the DTX */ #ifdef DEBUG_MODE_INFO , const int16_t ch_idx @@ -559,7 +562,11 @@ ivas_error pre_proc_front_ivas( st->cng_type = LP_CNG; } +#ifdef NONBE_1211_DTX_BR_SWITCHING + dtx( st, last_ivas_total_brate, ivas_total_brate, *vad_flag_dtx, inp_12k8 ); +#else dtx( st, ivas_total_brate, *vad_flag_dtx, inp_12k8 ); +#endif if ( hCPE != NULL && hCPE->hStereoDft != NULL && st->core_brate == SID_2k40 ) { diff --git a/lib_enc/ivas_cpe_enc.c b/lib_enc/ivas_cpe_enc.c index c0fd2918c..66fb8bfc2 100644 --- a/lib_enc/ivas_cpe_enc.c +++ b/lib_enc/ivas_cpe_enc.c @@ -360,11 +360,11 @@ ivas_error ivas_cpe_enc( hCPE->hStereoMdct->fDualMono = 0; hCPE->hStereoMdct->fMSstereo = 0; - if ( hCPE->hStereoMdct->mdct_stereo_mode_cmdl == SMDCT_FORCE_LR ) + if ( hCPE->hStereoMdct->mdct_stereo_mode_cmdl == SMDCT_FORCE_LR ) { hCPE->hStereoMdct->fDualMono = 1; } - else if ( hCPE->hStereoMdct->mdct_stereo_mode_cmdl == SMDCT_FORCE_MS ) + else if ( hCPE->hStereoMdct->mdct_stereo_mode_cmdl == SMDCT_FORCE_MS ) { hCPE->hStereoMdct->fMSstereo = 1; } @@ -485,7 +485,11 @@ ivas_error ivas_cpe_enc( error = pre_proc_front_ivas( NULL, hCPE, hCPE->element_brate, nb_bits_metadata, input_frame, n, old_inp_12k8[n], old_inp_16k[n], &ener[n], &relE[n], A[n], Aw[n], epsP[n], lsp_new[n], lsp_mid[n], &vad_hover_flag[n], &attack_flag[n], realBuffer[n], imagBuffer[n], old_wsp[n], pitch_fr[n], voicing_fr[n], &loc_harm[n], &cor_map_sum[n], &vad_flag_dtx[n], enerBuffer[n], +#ifdef NONBE_1211_DTX_BR_SWITCHING + fft_buff[n], A[0], lsp_new[0], currFlatness[n], tdm_ratio_idx, fr_bands, Etot_LR, lf_E, localVAD_HE_SAD, band_energies_LR, 0, st_ivas->hSpar != NULL ? st_ivas->hSpar->front_vad_flag : 0, 0, 0, ivas_format, st_ivas->hMCT != NULL, st_ivas->hEncoderConfig->last_ivas_total_brate, ivas_total_brate +#else fft_buff[n], A[0], lsp_new[0], currFlatness[n], tdm_ratio_idx, fr_bands, Etot_LR, lf_E, localVAD_HE_SAD, band_energies_LR, 0, st_ivas->hSpar != NULL ? st_ivas->hSpar->front_vad_flag : 0, 0, 0, ivas_format, st_ivas->hMCT != NULL, ivas_total_brate +#endif #ifdef DEBUG_MODE_INFO , ( st_ivas->nSCE + ( cpe_id * CPE_CHANNELS ) + n ) diff --git a/lib_enc/ivas_front_vad.c b/lib_enc/ivas_front_vad.c index 52c3b8cd4..49268e523 100644 --- a/lib_enc/ivas_front_vad.c +++ b/lib_enc/ivas_front_vad.c @@ -423,7 +423,12 @@ ivas_error front_vad_spar( noise_est_down( fr_bands[0], hFrontVad->hNoiseEst->bckr, tmpN, tmpE, st->min_band, st->max_band, &hFrontVad->hNoiseEst->totalNoise, Etot[0], &hFrontVad->hNoiseEst->Etot_last, &hFrontVad->hNoiseEst->Etot_v_h2 ); corr_shift = correlation_shift( hFrontVad->hNoiseEst->totalNoise ); +#ifdef NONBE_1211_DTX_BR_SWITCHING + dtx( st, hEncoderConfig->last_ivas_total_brate, hEncoderConfig->ivas_total_brate, vad_flag_dtx[0], inp_12k8 ); +#else dtx( st, hEncoderConfig->ivas_total_brate, vad_flag_dtx[0], inp_12k8 ); +#endif + /* linear prediction analysis */ alw_pitch_lag_12k8[0] = st->old_pitch_la; alw_pitch_lag_12k8[1] = st->old_pitch_la; diff --git a/lib_enc/ivas_ism_enc.c b/lib_enc/ivas_ism_enc.c index bc7ced57f..1502b523b 100644 --- a/lib_enc/ivas_ism_enc.c +++ b/lib_enc/ivas_ism_enc.c @@ -175,7 +175,11 @@ ivas_error ivas_ism_enc( error = pre_proc_front_ivas( hSCE, NULL, hSCE->element_brate, nb_bits_metadata[sce_id], input_frame, 0, old_inp_12k8[sce_id][0], old_inp_16k[sce_id][0], &ener[sce_id][0], &relE[sce_id][0], A[sce_id][0], Aw[sce_id][0], epsP[sce_id][0], lsp_new[sce_id][0], lsp_mid[sce_id][0], &vad_hover_flag[sce_id][0], &attack_flag[sce_id][0], realBuffer[sce_id][0], imagBuffer[sce_id][0], old_wsp[sce_id][0], pitch_fr[sce_id][0], voicing_fr[sce_id][0], &loc_harm[sce_id][0], &cor_map_sum[sce_id][0], &vad_flag_dtx[sce_id][0], enerBuffer[sce_id][0], +#ifdef NONBE_1211_DTX_BR_SWITCHING + fft_buff[sce_id][0], A[sce_id][0], lsp_new[sce_id][0], currFlatness[0], 0, fr_bands, Etot_LR, lf_E, localVAD_HE_SAD, NULL, 0, 0, 0, 0, ISM_FORMAT, 0, st_ivas->hEncoderConfig->last_ivas_total_brate, st_ivas->hEncoderConfig->ivas_total_brate +#else fft_buff[sce_id][0], A[sce_id][0], lsp_new[sce_id][0], currFlatness[0], 0, fr_bands, Etot_LR, lf_E, localVAD_HE_SAD, NULL, 0, 0, 0, 0, ISM_FORMAT, 0, st_ivas->hEncoderConfig->ivas_total_brate +#endif #ifdef DEBUG_MODE_INFO , st->id_element diff --git a/lib_enc/ivas_osba_enc.c b/lib_enc/ivas_osba_enc.c index dffebcf19..bd0d6b1b5 100644 --- a/lib_enc/ivas_osba_enc.c +++ b/lib_enc/ivas_osba_enc.c @@ -447,7 +447,6 @@ static void ivas_osba_render_ism_to_sba( int16_t azimuth, elevation; float gains[MAX_INPUT_CHANNELS]; float g1, g2; - float output_gain; int16_t nchan_sba; diff --git a/lib_enc/ivas_sce_enc.c b/lib_enc/ivas_sce_enc.c index a1d6d5945..1db9b2b17 100644 --- a/lib_enc/ivas_sce_enc.c +++ b/lib_enc/ivas_sce_enc.c @@ -183,7 +183,11 @@ ivas_error ivas_sce_enc( &ener[0], &relE[0], A[0], Aw[0], epsP[0], lsp_new[0], lsp_mid[0], &vad_hover_flag[0], &attack_flag[0], realBuffer[0], imagBuffer[0], old_wsp[0], pitch_fr[0], voicing_fr[0], &loc_harm[0], &cor_map_sum[0], &vad_flag_dtx[0], enerBuffer[0], fft_buff[0], A[0], lsp_new[0], currFlatness[0], 0, fr_bands, Etot_LR, lf_E, localVAD_HE_SAD, NULL, flag_16k_smc, +#ifdef NONBE_1211_DTX_BR_SWITCHING + st_ivas->hSpar != NULL ? st_ivas->hSpar->front_vad_flag : 0, st_ivas->hSpar != NULL ? st_ivas->hSpar->force_front_vad : 0, st_ivas->hSpar != NULL ? st_ivas->hSpar->front_vad_dtx_flag : 0, ivas_format, 0, st_ivas->hEncoderConfig->last_ivas_total_brate, st_ivas->hEncoderConfig->ivas_total_brate +#else st_ivas->hSpar != NULL ? st_ivas->hSpar->front_vad_flag : 0, st_ivas->hSpar != NULL ? st_ivas->hSpar->force_front_vad : 0, st_ivas->hSpar != NULL ? st_ivas->hSpar->front_vad_dtx_flag : 0, ivas_format, 0, st_ivas->hEncoderConfig->ivas_total_brate +#endif #ifdef DEBUG_MODE_INFO , st->id_element diff --git a/lib_enc/pre_proc.c b/lib_enc/pre_proc.c index d6ddbc2b6..32de5ec30 100644 --- a/lib_enc/pre_proc.c +++ b/lib_enc/pre_proc.c @@ -53,14 +53,14 @@ *--------------------------------------------------------------------*/ void pre_proc( - Encoder_State *st, /* i/o: encoder state structure */ - const int16_t input_frame, /* i : frame length */ - float old_inp_12k8[], /* i/o: buffer of old input signal */ - float old_inp_16k[], /* i/o: buffer of old input signal @ 16kHz */ - float **inp, /* o : ptr. to inp. signal in the current frame*/ - float fr_bands[2 * NB_BANDS], /* i : energy in frequency bands */ - float *ener, /* o : residual energy from Levinson-Durbin */ - int16_t pitch_orig[3], /* o : open-loop pitch values for quantization */ + Encoder_State *st, /* i/o: encoder state structure */ + const int16_t input_frame, /* i : frame length */ + float old_inp_12k8[], /* i/o: buffer of old input signal */ + float old_inp_16k[], /* i/o: buffer of old input signal @ 16kHz */ + float **inp, /* o : ptr. to inp. signal in the current frame*/ + float fr_bands[2 * NB_BANDS], /* i : energy in frequency bands */ + float *ener, /* o : residual energy from Levinson-Durbin */ + int16_t pitch_orig[3], /* o : open-loop pitch values for quantization */ float A[NB_SUBFR16k * ( M + 1 )], /* i/o: A(z) unquantized for the 4 subframes */ float Aw[NB_SUBFR16k * ( M + 1 )], /* i/o: weighted A(z) unquantized for subframes */ float epsP[M + 1], /* i/o: LP prediction errors */ @@ -250,7 +250,13 @@ void pre_proc( /*-----------------------------------------------------------------* * Select SID or FRAME_NO_DATA frame if DTX enabled *-----------------------------------------------------------------*/ + +#ifdef NONBE_1211_DTX_BR_SWITCHING + dtx( st, -1, -1, vad_flag_dtx, inp_12k8 ); +#else dtx( st, -1, vad_flag_dtx, inp_12k8 ); +#endif + /*----------------------------------------------------------------* * Adjust FD-CNG Noise Estimator *----------------------------------------------------------------*/ -- GitLab