diff --git a/lib_com/ivas_cnst.h b/lib_com/ivas_cnst.h index 3d66723ecbcee9509c0913325a6dcc30a6f17e23..47ce9b8710e9d246bf21389e9e5e1aff82ffd2e4 100644 --- a/lib_com/ivas_cnst.h +++ b/lib_com/ivas_cnst.h @@ -1018,6 +1018,13 @@ typedef enum #define DIRAC_SLOT_NS 1250000L /* time duration of a time slot, 1.25ms (==DELAY_RENERER_NS/MAX_PARAM_SPATIAL_SUBFRAMES) */ #define DIRAC_SLOT_ENC_NS 5000000L +#define DIRAC_MONO_THRESH_SILENCE 3e4f +#define DIRAC_MONO_NORM_FACTOR 1e13f +#define DIRAC_MONO_ONE_ON_NORM_FACTOR (1.f / 1e13f) +#define DIRAC_MONO_MAX_THRESH 1e6f +#define DIRAC_MONO_MIN_THRESH 1e2f +#define DIRAC_MONO_FRAME_THRESH 15 /* 30ms */ + typedef enum { DIRAC_OPEN, /* initialize to default value */ diff --git a/lib_com/ivas_prot.h b/lib_com/ivas_prot.h index 46284eb325f30e6bfb818232a26118031c55a055..d6efef32df9a50950a6b6ccf6af083252d175b88 100644 --- a/lib_com/ivas_prot.h +++ b/lib_com/ivas_prot.h @@ -3433,6 +3433,11 @@ void ivas_dirac_param_est_enc( const IVAS_FORMAT ivas_format , const int16_t hodirac_flag, const int16_t nchan_fb_in +#ifdef FIX_527_SBA_MONO_INPUT + , + int16_t *mono_frame_count, + int16_t *dirac_mono_flag +#endif ); @@ -4385,6 +4390,9 @@ ivas_error ivas_spar_md_enc_process( const int16_t nchan_inp, const int16_t sba_order, /* i : Ambisonic (SBA) order */ float *prior_mixer[IVAS_MAX_FB_MIXER_OUT_CH][IVAS_MAX_SPAR_FB_MIXER_IN_CH] /* i : prior mixer_matrix */ +#ifdef FIX_527_SBA_MONO_INPUT + ,const int16_t dirac_mono_flag +#endif ); void ivas_compute_spar_params( @@ -4524,6 +4532,11 @@ void ivas_spar_update_md_hist( ivas_spar_md_dec_state_t *hMdDec /* i/o: SPAR MD decoder handle */ ); +int16_t ivas_spar_chk_zero_coefs( + Decoder_Struct *st_ivas, /* i/o: IVAS decoder handle */ + const int16_t sba_order /* i : Ambisonic (SBA) order */ +); + void ivas_spar_smooth_md_dtx( ivas_spar_md_dec_state_t *hMdDec, /* i/o: SPAR MD decoder handle */ const int16_t num_bands_out, /* i : number of output bands */ @@ -5527,6 +5540,11 @@ void computeReferencePower_enc( const IVAS_FORMAT ivas_format, /* i : ivas_format */ int16_t ref_power_w, /* i : use 0 if hodirac is enabled */ const int16_t nchan_ana /* i : number of analysis channels */ +#ifdef FIX_527_SBA_MONO_INPUT + , + int16_t *mono_frame_count, + int16_t *dirac_mono_flag +#endif ); ivas_error ivas_mono_dmx_renderer_open( diff --git a/lib_com/ivas_qmetadata_com.c b/lib_com/ivas_qmetadata_com.c index 32b4653d2f4c40a1dc9befe955c442ce71241a23..022f4efdf4c986db0df370f24d1b8f9f137b469e 100644 --- a/lib_com/ivas_qmetadata_com.c +++ b/lib_com/ivas_qmetadata_com.c @@ -149,6 +149,9 @@ ivas_error ivas_qmetadata_allocate_memory( { set_zero( hQMetaData->q_direction[dir].band_data[j].elevation, MAX_PARAM_SPATIAL_SUBFRAMES ); set_zero( hQMetaData->q_direction[dir].band_data[j].azimuth, MAX_PARAM_SPATIAL_SUBFRAMES ); +#ifdef FIX_527_SBA_MONO_INPUT + set_zero( hQMetaData->q_direction[dir].band_data[j].energy_ratio, MAX_PARAM_SPATIAL_SUBFRAMES ); +#endif } } diff --git a/lib_com/ivas_stat_com.h b/lib_com/ivas_stat_com.h index 08359b7a4af4f5702a4ebdadd0652101b009f827..04a8c5ed4b7200b25a3b8b9fb682515b166723a3 100644 --- a/lib_com/ivas_stat_com.h +++ b/lib_com/ivas_stat_com.h @@ -558,6 +558,9 @@ typedef struct ivas_masa_qmetadata_frame_struct int16_t metadata_max_bits; /* maximum allowed number of bits for metadata per frame */ uint8_t useLowerRes; uint8_t useLowerBandRes; +#ifdef FIX_527_SBA_MONO_INPUT + int16_t dirac_mono_flag; +#endif IVAS_SURROUND_COHERENCE_BAND_DATA *surcoh_band_data; /* Additional helper values to include all data required for writing to output file */ diff --git a/lib_com/options.h b/lib_com/options.h index e8eda32065d915b0a5c16861307a23f308943321..26a75598c99f3f47db0653b567def4c31d14797c 100755 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -222,6 +222,8 @@ #define FIX_647_SILENT_W_PARAMBIN /* Nokia: Issue #647: Fix silent W SH inputs in parametric binauralizer */ #define MASA_AND_OBJECTS /* Nokia: Combination of MASA and objects */ + +#define FIX_527_SBA_MONO_INPUT /* Dlb : Fix for mono content in a HOA input format */ #define FIX_653_BUG_IN_SKIP_MATRIX /* Dlb: fix for issue #653, bug in the ivas_spar_get_skip_mat function*/ #define FIX_663_PARAM_ISM_EXT /* FhG: Issue 663: ParamISM EXT output improvement */ #define FIX_673_OMASA_OBJ_MD_SYNC /* Nokia: Fix issue 673 by updating metadata in the third subframe to account for audio delay. */ diff --git a/lib_dec/ivas_spar_md_dec.c b/lib_dec/ivas_spar_md_dec.c index ec363399e417f67e34c6dbd4ac969b04317976ed..62af38c63c0026c99d3055cf51a6bf702f8797f7 100644 --- a/lib_dec/ivas_spar_md_dec.c +++ b/lib_dec/ivas_spar_md_dec.c @@ -730,6 +730,63 @@ static ivas_error ivas_spar_set_dec_config( return IVAS_ERR_OK; } +/*-----------------------------------------------------------------------------------------* + * Function ivas_dec_mono_sba_handling() + * + * + *-----------------------------------------------------------------------------------------*/ + +static void ivas_dec_mono_sba_handling( + Decoder_Struct *st_ivas, /* i/o: IVAS decoder handle */ + const int16_t sba_order /* i : Ambisonic (SBA) order */ +) +{ + int16_t mono_flag, b, block; + + mono_flag = 1; + + for ( b = 0; b < st_ivas->hQMetaData->q_direction[0].cfg.nbands; b++ ) + { + for ( block = 0; block < MAX_PARAM_SPATIAL_SUBFRAMES; ++block ) + { + float azimuth = st_ivas->hQMetaData->q_direction[0].band_data[b].azimuth[block]; + float elevation = st_ivas->hQMetaData->q_direction[0].band_data[b].azimuth[block]; + float energy_ratio = st_ivas->hQMetaData->q_direction[0].band_data[0].energy_ratio[block]; + if ( + ( azimuth != 0.0f ) || + ( elevation != 0.0f ) || + ( energy_ratio > 0.15f ) ) /* 0.15f is just above the lowest quantised value. */ + { + mono_flag = 0; + } + } + } + /* Combine the SPAR prediction coefs flag with the azimuth, elevation and energy ratio flag.*/ + mono_flag = mono_flag && ivas_spar_chk_zero_coefs( st_ivas, sba_order ); + + if ( mono_flag ) + { + /* Set Energy Ratio values to be zero */ + for ( b = 0; b < st_ivas->hQMetaData->q_direction[0].cfg.nbands; b++ ) + { + set_zero( st_ivas->hQMetaData->q_direction[0].band_data[b].energy_ratio, MAX_PARAM_SPATIAL_SUBFRAMES ); + } + if ( st_ivas->hDirAC != NULL ) + { + for ( block = 0; block < st_ivas->hSpatParamRendCom->dirac_md_buffer_length; ++block ) + { + /* Set directional Energy Ratio values to be zero */ + set_zero( st_ivas->hSpatParamRendCom->energy_ratio1[block], st_ivas->hSpatParamRendCom->num_freq_bands ); + if ( st_ivas->hQMetaData->no_directions == 2 ) + { + set_zero( st_ivas->hSpatParamRendCom->energy_ratio2[block], st_ivas->hSpatParamRendCom->num_freq_bands ); + } + /* Set Diffuseness values to be 1.0 */ + set_f( st_ivas->hSpatParamRendCom->diffuseness_vector[block], 1.0f, st_ivas->hSpatParamRendCom->num_freq_bands ); + } + } + } +} /*-----------------------------------------------------------------------------------------* * Function ivas_spar_md_dec_process() @@ -769,6 +826,10 @@ void ivas_spar_md_dec_process( #endif st_ivas->hQMetaData->sba_inactive_mode, st_ivas->last_active_ivas_total_brate ); +#ifdef FIX_527_SBA_MONO_INPUT + ivas_dec_mono_sba_handling( st_ivas, sba_order ); +#endif + #if 0 { char f_name[100]; @@ -1095,7 +1156,66 @@ void ivas_spar_md_dec_process( return; } +/*-----------------------------------------------------------------------------------------* + * Function ivas_spar_chk_zero_coefs() + * + * Check for zeroed SPAR coefficients + *-----------------------------------------------------------------------------------------*/ +int16_t ivas_spar_chk_zero_coefs( + Decoder_Struct *st_ivas, /* i/o: IVAS decoder handle */ + const int16_t sba_order /* i : Ambisonic (SBA) order */ +) +{ + int16_t j, k, b, i_ts; + ivas_spar_md_dec_state_t *hMdDec; + int16_t num_md_sub_frames; + int16_t mono = 1; + int16_t ndec, ndm; + + hMdDec = st_ivas->hSpar->hMdDec; + num_md_sub_frames = ivas_get_spar_dec_md_num_subframes( sba_order, st_ivas->hDecoderConfig->ivas_total_brate +#ifdef VLBR_20MS_MD + , + st_ivas->last_active_ivas_total_brate +#endif + ); + + ndec = hMdDec->spar_md_cfg.num_decorr_per_band[0]; + ndm = hMdDec->spar_md_cfg.num_dmx_chans_per_band[0]; + + for ( i_ts = 0; i_ts < num_md_sub_frames; i_ts++ ) + { + for ( b = 0; b < min( hMdDec->spar_md.num_bands, SPAR_DIRAC_SPLIT_START_BAND ); b++ ) + { + for ( j = 0; j < ndm + ndec - 1; j++ ) + { + if ( hMdDec->spar_md.band_coeffs[b + i_ts * IVAS_MAX_NUM_BANDS].pred_re[j] != 0.0f ) + { + mono = 0; + } + } + for ( j = 0; j < ndec; j++ ) + { + for ( k = 0; k < ndm - 1; k++ ) + { + if ( hMdDec->spar_md.band_coeffs[b + i_ts * IVAS_MAX_NUM_BANDS].C_re[j][k] != 0.0f ) + { + mono = 0; + } + } + } + for ( j = 0; j < ndec; j++ ) + { + if ( hMdDec->spar_md.band_coeffs[b + i_ts * IVAS_MAX_NUM_BANDS].P_re[j] != 0.0f ) + { + mono = 0; + } + } + } + } + return mono; +} /*-----------------------------------------------------------------------------------------* * Function ivas_spar_smooth_md_dtx() diff --git a/lib_enc/ivas_dirac_enc.c b/lib_enc/ivas_dirac_enc.c index 1a25541d313e2caf29de099c1b5e3c83fb594dee..9f4b3b76943aa7487a95eb3cff2fb1052fa425f7 100644 --- a/lib_enc/ivas_dirac_enc.c +++ b/lib_enc/ivas_dirac_enc.c @@ -159,6 +159,7 @@ ivas_error ivas_dirac_enc_open( } hDirAC->index_buffer_intensity = 0; + hDirAC->mono_frame_count = 0; st_ivas->hDirAC = hDirAC; st_ivas->hSpar->enc_param_start_band = st_ivas->hDirAC->hConfig->enc_param_start_band; @@ -299,7 +300,13 @@ void ivas_dirac_enc( int16_t i, j, b, i_ts; push_wmops( "ivas_dirac_enc" ); - ivas_dirac_param_est_enc( hDirAC, hQMetaData->q_direction, hQMetaData->useLowerRes, data_f, ppIn_FR_real, ppIn_FR_imag, input_frame, ivas_format, hodirac_flag, hodirac_flag ? HOA2_CHANNELS : FOA_CHANNELS ); + ivas_dirac_param_est_enc( hDirAC, hQMetaData->q_direction, hQMetaData->useLowerRes, data_f, ppIn_FR_real, ppIn_FR_imag, input_frame, ivas_format, hodirac_flag, hodirac_flag ? HOA2_CHANNELS : FOA_CHANNELS +#ifdef FIX_527_SBA_MONO_INPUT + , + &( hDirAC->mono_frame_count ), + &( hQMetaData->dirac_mono_flag ) +#endif + ); if ( hQMetaData->q_direction->cfg.nbands > 0 ) { @@ -310,6 +317,22 @@ void ivas_dirac_enc( /* WB 4TC mode bit : disable for now*/ push_next_indice( hMetaData, 0, 1 ); +#ifdef FIX_527_SBA_MONO_INPUT + /* Set Energy Ratio to 0.0 if the mono flag is set, before the metadata is encoded */ + if ( hQMetaData->dirac_mono_flag ) + { + for ( b = hQMetaData->q_direction->cfg.start_band; b < hQMetaData->q_direction->cfg.nbands; b++ ) + { + for ( i_ts = 0; i_ts < ( ( dtx_vad == 1 ) ? hQMetaData->q_direction[0].cfg.nblocks : 1 ); i_ts++ ) + { + hQMetaData->q_direction[0].band_data[b].energy_ratio[i_ts] = 0.0f; + hQMetaData->q_direction[0].band_data[b].azimuth[i_ts] = 0.0f; + hQMetaData->q_direction[0].band_data[b].elevation[i_ts] = 0.0f; + } + } + } +#endif + ivas_qmetadata_enc_encode( hMetaData, hQMetaData, hodirac_flag ); } else @@ -388,6 +411,118 @@ void ivas_dirac_enc( return; } +/*------------------------------------------------------------------------- + * ivas_dirac_get_mono_flag() + * + * + *-------------------------------------------------------------------------*/ + +static int16_t ivas_dirac_get_mono_flag( + const int16_t *band_grouping, /* i : Band grouping for estimation */ + float Cldfb_RealBuffer[DIRAC_MAX_ANA_CHANS][DIRAC_NO_FB_BANDS_MAX], /* i : Real part of input signal */ + float Cldfb_ImagBuffer[DIRAC_MAX_ANA_CHANS][DIRAC_NO_FB_BANDS_MAX], /* i : Imag part of input signal */ + const int16_t nchan_ana, /* i : number of analysis channels */ + int16_t *mono_frame_count ) /* i/o : current number of mono frames count */ +{ + int16_t brange[2]; + int16_t i, j, ch_idx; + float other_ch_band_power; + float W_band_power; + int16_t any_mc_band = 0; + int16_t any_mono_band = 0; + int16_t local_mono_flag = 0; + float W_band_power_norm; + float threshold = 0; + + /* Banded Power Calculations */ + for ( i = 0; i < DIRAC_MAX_NBANDS; i++ ) + { + W_band_power = 0; + other_ch_band_power = 0; + + brange[0] = band_grouping[i]; + brange[1] = band_grouping[i + 1]; + + /* Loop over the W channel bins to calculate the power in the band */ + for ( j = brange[0]; j < brange[1]; j++ ) + { + W_band_power += ( Cldfb_RealBuffer[0][j] * Cldfb_RealBuffer[0][j] ) + ( Cldfb_ImagBuffer[0][j] * Cldfb_ImagBuffer[0][j] ); + } + + /* Loop over the other channels and bins to calculate the power in the band */ + for ( ch_idx = 1; ch_idx < nchan_ana; ch_idx++ ) + { + /* abs()^2 */ + for ( j = brange[0]; j < brange[1]; j++ ) + { + other_ch_band_power += ( Cldfb_RealBuffer[ch_idx][j] * Cldfb_RealBuffer[ch_idx][j] ) + ( Cldfb_ImagBuffer[ch_idx][j] * Cldfb_ImagBuffer[ch_idx][j] ); + } + } + if ( other_ch_band_power < EPSILON ) + { + if ( W_band_power > DIRAC_MONO_THRESH_SILENCE ) + { + any_mono_band = 1; + } + } + else + { + if ( ( W_band_power > DIRAC_MONO_THRESH_SILENCE ) || ( other_ch_band_power > DIRAC_MONO_THRESH_SILENCE ) ) + { + W_band_power_norm = min( W_band_power, DIRAC_MONO_NORM_FACTOR ) * DIRAC_MONO_ONE_ON_NORM_FACTOR; + threshold = max( W_band_power_norm * DIRAC_MONO_MAX_THRESH, DIRAC_MONO_MIN_THRESH ); + if ( W_band_power / other_ch_band_power > threshold ) + { + any_mono_band = 1; + } + else + { + any_mc_band = 1; + } + } + } + } + /* If any band contains multi-channel content it's not mono */ + if ( any_mc_band ) + { + local_mono_flag = 0; + } + else + { + /* If any band contains mono content the frame is mono. */ + if ( any_mono_band ) + { + local_mono_flag = 1; + } + } + /* Hysteresis - only after DIRAC_MONO_FRAME_THRESH frames on mono will the actual mono flag be set */ + if ( local_mono_flag ) + { + if ( *mono_frame_count < DIRAC_MONO_FRAME_THRESH ) + { + ( *mono_frame_count )++; + } + } + else + { + /* Instantaneously disable actual mono flag if multi-channel content is observed */ + if ( any_mc_band ) + { + *mono_frame_count = 0; + } + } + + /* Final check if there has been mono for DIRAC_MONO_FRAME_THRESH number of frames than the content is declared mono */ + if ( *mono_frame_count == DIRAC_MONO_FRAME_THRESH ) + { + return 1; + } + else + { + return 0; + } +} + /*------------------------------------------------------------------------- * computeReferencePower_enc() * @@ -401,9 +536,14 @@ void computeReferencePower_enc( float *reference_power, /* o : Estimated power */ const int16_t enc_param_start_band, /* i : first band to process */ const int16_t num_freq_bands, /* i : Number of frequency bands */ - const IVAS_FORMAT ivas_format, /* i : ivas_format */ + const IVAS_FORMAT ivas_format, /* i : ivas_format */ int16_t ref_power_w, /* i : use 0 if hodirac is enabled */ const int16_t nchan_ana /* i : number of analysis channels */ +#ifdef FIX_527_SBA_MONO_INPUT + , + int16_t *mono_frame_count, /* i/o: Mono Frame Count */ + int16_t *dirac_mono_flag /* i/o: Mono Flag */ +#endif ) { int16_t brange[2]; @@ -411,10 +551,18 @@ void computeReferencePower_enc( float reference_power_W[DIRAC_MAX_NBANDS]; +#ifdef FIX_527_SBA_MONO_INPUT + if ( dirac_mono_flag != NULL ) + { + *dirac_mono_flag = ivas_dirac_get_mono_flag( band_grouping, Cldfb_RealBuffer, Cldfb_ImagBuffer, nchan_ana, mono_frame_count ); + } +#endif + for ( i = 0; i < num_freq_bands; i++ ) { brange[0] = band_grouping[i + enc_param_start_band]; brange[1] = band_grouping[i + enc_param_start_band + 1]; + reference_power[i] = 0; reference_power_W[i] = 0; @@ -446,7 +594,6 @@ void computeReferencePower_enc( return; } - /*------------------------------------------------------------------------- * ivas_dirac_param_est_enc() * @@ -463,7 +610,13 @@ void ivas_dirac_param_est_enc( const int16_t input_frame, const IVAS_FORMAT ivas_format, const int16_t hodirac_flag, - const int16_t nchan_fb_in ) + const int16_t nchan_fb_in +#ifdef FIX_527_SBA_MONO_INPUT + , + int16_t *mono_frame_count, + int16_t *dirac_mono_flag +#endif +) { int16_t i, d, ts, index, l_ts, num_freq_bands; int16_t band_m_idx, block_m_idx; @@ -571,7 +724,13 @@ void ivas_dirac_param_est_enc( num_freq_bands, ivas_format, hodirac_flag ? 0 : 1, - FOA_CHANNELS ); + FOA_CHANNELS +#ifdef FIX_527_SBA_MONO_INPUT + , + mono_frame_count, + dirac_mono_flag +#endif + ); computeIntensityVector_enc( hDirAC, diff --git a/lib_enc/ivas_mcmasa_enc.c b/lib_enc/ivas_mcmasa_enc.c index 9dbdbcab15b3f7de1c47813293cc4ef21573ec4b..2088fa7dced6d935dc4c09eede50c3caa9c6ddcc 100644 --- a/lib_enc/ivas_mcmasa_enc.c +++ b/lib_enc/ivas_mcmasa_enc.c @@ -979,7 +979,13 @@ void ivas_mcmasa_param_est_enc( num_freq_bands, MC_FORMAT, 0, - FOA_CHANNELS ); + FOA_CHANNELS +#ifdef FIX_527_SBA_MONO_INPUT + , + NULL, + NULL +#endif + ); /* Fill buffers of length "averaging_length" time slots for intensity and energy */ hMcMasa->index_buffer_intensity = ( hMcMasa->index_buffer_intensity % hMcMasa->no_col_avg_diff ) + 1; /* averaging_length = 32 */ diff --git a/lib_enc/ivas_spar_encoder.c b/lib_enc/ivas_spar_encoder.c index b7b54809521db52bc2bd556994ce5caa61c379ab..516104c79b8ef60eb0582b094e1fe08c27d48367 100644 --- a/lib_enc/ivas_spar_encoder.c +++ b/lib_enc/ivas_spar_encoder.c @@ -451,7 +451,12 @@ static ivas_error ivas_spar_cov_md_process( if ( hSpar->hMdEnc->spar_hoa_md_flag == 0 ) { - ivas_spar_md_enc_process( hSpar->hMdEnc, hEncoderConfig, cov_real, cov_dtx_real, hMetaData, dtx_vad, nchan_inp, sba_order, hSpar->hFbMixer->prior_mixer ); + ivas_spar_md_enc_process( hSpar->hMdEnc, hEncoderConfig, cov_real, cov_dtx_real, hMetaData, dtx_vad, nchan_inp, sba_order, hSpar->hFbMixer->prior_mixer +#ifdef FIX_527_SBA_MONO_INPUT + , + hQMetaData->dirac_mono_flag +#endif + ); } if ( hSpar->hMdEnc->spar_hoa_dirac2spar_md_flag ) @@ -499,7 +504,12 @@ static ivas_error ivas_spar_cov_md_process( if ( hSpar->hMdEnc->spar_hoa_md_flag ) { - ivas_spar_md_enc_process( hSpar->hMdEnc, hEncoderConfig, cov_real, cov_dtx_real, hMetaData, dtx_vad, nchan_inp, sba_order, hSpar->hFbMixer->prior_mixer ); + ivas_spar_md_enc_process( hSpar->hMdEnc, hEncoderConfig, cov_real, cov_dtx_real, hMetaData, dtx_vad, nchan_inp, sba_order, hSpar->hFbMixer->prior_mixer +#ifdef FIX_527_SBA_MONO_INPUT + , + hQMetaData->dirac_mono_flag +#endif + ); } return error; @@ -542,7 +552,7 @@ static ivas_error ivas_spar_enc_process( const int16_t *order; SPAR_ENC_HANDLE hSpar = st_ivas->hSpar; IVAS_QMETADATA_HANDLE hQMetaData = st_ivas->hQMetaData; - int16_t ts, l_ts, num_del_samples; + int16_t ts, l_ts, num_del_samples, b, i_ts; float *ppIn_FR_real[IVAS_SPAR_MAX_CH], *ppIn_FR_imag[IVAS_SPAR_MAX_CH]; float wyzx_del_buf[FOA_CHANNELS][IVAS_FB_1MS_48K_SAMP]; @@ -601,7 +611,7 @@ static ivas_error ivas_spar_enc_process( /* fill delay (1 ms) buffer for all Transport channels */ for ( i = 0; i < hSpar->hFbMixer->fb_cfg->num_out_chans; i++ ) { - int idx = hSpar->hFbMixer->fb_cfg->remix_order[i]; + int16_t idx = hSpar->hFbMixer->fb_cfg->remix_order[i]; mvr2r( &hSpar->hFbMixer->ppFilterbank_prior_input[idx][hSpar->hFbMixer->fb_cfg->prior_input_length - num_del_samples], wyzx_del_buf[idx], num_del_samples ); } } @@ -660,6 +670,19 @@ static ivas_error ivas_spar_enc_process( ivas_dirac_enc( st_ivas->hDirAC, hQMetaData, hMetaData, data_f, ppIn_FR_real, ppIn_FR_imag, input_frame, dtx_vad, hEncoderConfig->ivas_format, hodirac_flag ); +#ifdef FIX_527_SBA_MONO_INPUT + /* Set Energy Ratio to 0.0 if the mono flag has been set */ + if ( hQMetaData->dirac_mono_flag ) + { + for ( b = hQMetaData->q_direction->cfg.start_band; b < hQMetaData->q_direction->cfg.nbands; b++ ) + { + for ( i_ts = 0; i_ts < ( ( dtx_vad == 1 ) ? hQMetaData->q_direction[0].cfg.nblocks : 1 ); i_ts++ ) + { + hQMetaData->q_direction[0].band_data[b].energy_ratio[i_ts] = 0.0f; + } + } + } +#endif #ifdef COVARIANCE_MEMORY_OPT /*-----------------------------------------------------------------------------------------* @@ -716,7 +739,12 @@ static ivas_error ivas_spar_enc_process( if ( hSpar->hMdEnc->spar_hoa_md_flag == 0 ) { - ivas_spar_md_enc_process( hSpar->hMdEnc, hEncoderConfig, cov_real, cov_dtx_real, hMetaData, dtx_vad, nchan_inp, sba_order, hSpar->hFbMixer->prior_mixer ); + ivas_spar_md_enc_process( hSpar->hMdEnc, hEncoderConfig, cov_real, cov_dtx_real, hMetaData, dtx_vad, nchan_inp, sba_order, hSpar->hFbMixer->prior_mixer +#ifdef FIX_527_SBA_MONO_INPUT + , + hQMetaData->dirac_mono_flag +#endif + ); } if ( hSpar->hMdEnc->spar_hoa_dirac2spar_md_flag ) @@ -764,7 +792,12 @@ static ivas_error ivas_spar_enc_process( if ( hSpar->hMdEnc->spar_hoa_md_flag ) { - ivas_spar_md_enc_process( hSpar->hMdEnc, hEncoderConfig, cov_real, cov_dtx_real, hMetaData, dtx_vad, nchan_inp, sba_order, hSpar->hFbMixer->prior_mixer ); + ivas_spar_md_enc_process( hSpar->hMdEnc, hEncoderConfig, cov_real, cov_dtx_real, hMetaData, dtx_vad, nchan_inp, sba_order, hSpar->hFbMixer->prior_mixer +#ifdef FIX_527_SBA_MONO_INPUT + , + hQMetaData->dirac_mono_flag +#endif + ); } #endif diff --git a/lib_enc/ivas_spar_md_enc.c b/lib_enc/ivas_spar_md_enc.c index e07f51e86c62ad05e951a95725030fa16fe5ce44..4e8556cd3789ea7e79d0140dbb5ca609939f0a6e 100644 --- a/lib_enc/ivas_spar_md_enc.c +++ b/lib_enc/ivas_spar_md_enc.c @@ -567,6 +567,10 @@ ivas_error ivas_spar_md_enc_process( const int16_t nchan_inp, const int16_t sba_order, /* i : Ambisonic (SBA) order */ float *prior_mixer[IVAS_MAX_FB_MIXER_OUT_CH][IVAS_MAX_SPAR_FB_MIXER_IN_CH] /* i : prior mixer_matrix */ +#ifdef FIX_527_SBA_MONO_INPUT + , + const int16_t dirac_mono_flag +#endif ) { float pred_coeffs_re[IVAS_SPAR_MAX_CH - 1][IVAS_MAX_NUM_BANDS]; @@ -690,6 +694,40 @@ ivas_error ivas_spar_md_enc_process( active_w_vlbr, &hMdEnc->spar_md_cfg, &hMdEnc->spar_md, Wscale, 0 ); +#ifdef FIX_527_SBA_MONO_INPUT + if ( dirac_mono_flag ) + { + int16_t i_ts; + int16_t num_md_sub_frames = 1; + + ndec = hMdEnc->spar_md_cfg.num_decorr_per_band[0]; + ndm = hMdEnc->spar_md_cfg.num_dmx_chans_per_band[0]; + + for ( i_ts = 0; i_ts < num_md_sub_frames; i_ts++ ) + { + for ( b = 0; b < IVAS_MAX_NUM_BANDS; b++ ) + { + for ( j = 0; j < ndm + ndec - 1; j++ ) + { + hMdEnc->spar_md.band_coeffs[b + i_ts * IVAS_MAX_NUM_BANDS].pred_re[j] = 0.0f; + } + for ( j = 0; j < ndec; j++ ) + { + for ( k = 0; k < ndm - 1; k++ ) + { + hMdEnc->spar_md.band_coeffs[b + i_ts * IVAS_MAX_NUM_BANDS].C_re[j][k] = 0.0f; + } + } + + for ( j = 0; j < ndec; j++ ) + { + hMdEnc->spar_md.band_coeffs[b + i_ts * IVAS_MAX_NUM_BANDS].P_re[j] = 0.0f; + } + } + } + } +#endif + for ( i = 0; i < num_ch; i++ ) { for ( j = 0; j < num_ch; j++ ) @@ -891,6 +929,20 @@ ivas_error ivas_spar_md_enc_process( planarCP #endif ); +#ifdef FIX_527_SBA_MONO_INPUT + if ( dirac_mono_flag ) + { + ndec = hMdEnc->spar_md_cfg.num_decorr_per_band[0]; + ndm = hMdEnc->spar_md_cfg.num_dmx_chans_per_band[0]; + for ( j = 0; j < ndec; j++ ) + { + for ( k = 0; k < ndm - 1; k++ ) + { + hMdEnc->spar_md.band_coeffs[b].C_re[j][k] = 0.0f; + } + } + } +#endif #ifdef SPAR_HOA_DBG /*fprintf(stderr, "\n\n C coefficients: band %d\n", b); diff --git a/lib_enc/ivas_stat_enc.h b/lib_enc/ivas_stat_enc.h index d5aec62a0d3763d7437bca78947f090c1a9b2366..e9f7d34736702c0d200331c83be5a3eaaa854d42 100644 --- a/lib_enc/ivas_stat_enc.h +++ b/lib_enc/ivas_stat_enc.h @@ -612,6 +612,8 @@ typedef struct ivas_dirac_enc_data_structure float **buffer_intensity_real[DIRAC_NUM_DIMS]; float *buffer_energy; + /* Frame count for detecting mono */ + int16_t mono_frame_count; } DIRAC_ENC_DATA, *DIRAC_ENC_HANDLE; diff --git a/lib_rend/ivas_splitRendererPre.c b/lib_rend/ivas_splitRendererPre.c index 0d7e1ed19e0c088da7c622978d40dc6db44e2e16..efbb188f8bc8beb3deec9298c88d01e0106feea6 100644 --- a/lib_rend/ivas_splitRendererPre.c +++ b/lib_rend/ivas_splitRendererPre.c @@ -59,8 +59,8 @@ #endif -#define MAX_BAND_SMOOTH ( 1 ) -#define SMOOTH_NORM_FACTOR ( 5.0f ) +#define MAX_BAND_SMOOTH ( 1 ) +#define SMOOTH_DIRAC_MONO_NORM_FACTOR ( 5.0f ) static void ivas_calc_mat_det_2by2_complex( float in_re[BINAURAL_CHANNELS][BINAURAL_CHANNELS], float in_im[BINAURAL_CHANNELS][BINAURAL_CHANNELS],