diff --git a/lib_com/options.h b/lib_com/options.h old mode 100644 new mode 100755 index d7d33c9ebc236d1677493e1a2728b78b905344b0..b462e5ec50e92c88facfc16130298d349d5c817b --- a/lib_com/options.h +++ b/lib_com/options.h @@ -161,8 +161,7 @@ #define FIX_VBR_COMPLEXITY /* Issue 234: fix extremely high complexity numbers for IVAS EVS mode */ #define FIX_ISM_INACTIVE_BITS /* Issue 230: fix bitbudget distribution in inactive frames in ISM format */ #define IMPROVE_CMDLINE_ROBUSTNESS /* Issue 233: Improve robustness of command-line parameters */ - -#define FIX_ITD_CNG /* Eri: Fix for CNG ITD */ +#define FIX_MDCT_AND_MC_MONO_ISSUES /* Issue 242: Fix some issues with TCX-LTP and delay alignement for mono output */ /* ################## End DEVELOPMENT switches ######################### */ diff --git a/lib_dec/ivas_cpe_dec.c b/lib_dec/ivas_cpe_dec.c index 12b8fcb8996e2e1fd739bc3b4bd850d39c0ccbff..41b8c339a26ff533d2c941908d6b009017744497 100644 --- a/lib_dec/ivas_cpe_dec.c +++ b/lib_dec/ivas_cpe_dec.c @@ -444,10 +444,12 @@ ivas_error ivas_cpe_dec( } } } +#ifndef FIX_MDCT_AND_MC_MONO_ISSUES else if ( hCPE->element_mode == IVAS_CPE_MDCT && hCPE->nchan_out == 1 && ( is_DTXrate( ivas_total_brate ) == 0 || ( is_DTXrate( ivas_total_brate ) == 1 && is_DTXrate( st_ivas->hDecoderConfig->last_ivas_total_brate ) == 0 ) ) ) { applyDmxMdctStereo( hCPE, output, output_frame ); } +#endif /*----------------------------------------------------------------* * Update parameters for stereo CNA @@ -461,6 +463,13 @@ ivas_error ivas_cpe_dec( synchro_synthesis( ivas_total_brate, hCPE, output, output_frame, 0 ); +#ifdef FIX_MDCT_AND_MC_MONO_ISSUES + if ( hCPE->element_mode == IVAS_CPE_MDCT && hCPE->nchan_out == 1 && ( is_DTXrate( ivas_total_brate ) == 0 || ( is_DTXrate( ivas_total_brate ) == 1 && is_DTXrate( st_ivas->hDecoderConfig->last_ivas_total_brate ) == 0 ) ) ) + { + applyDmxMdctStereo( hCPE, output, output_frame ); + } +#endif + #ifndef DEBUG_STEREO_DFT_OUTRESPRED /*----------------------------------------------------------------* * IC-BWE: output LB and HB mix in ACELP mode @@ -581,7 +590,11 @@ ivas_error create_cpe_dec( hCPE->lt_es_em = 0.0f; /* Note: nchan_out is considered to be related to the structure. This is nchan_out for CPE and for MASA_format is always 2. */ +#ifdef FIX_MDCT_AND_MC_MONO_ISSUES + if ( st_ivas->ivas_format == SBA_FORMAT || st_ivas->ivas_format == MASA_FORMAT || st_ivas->ivas_format == MC_FORMAT ) +#else if ( st_ivas->ivas_format == SBA_FORMAT || st_ivas->ivas_format == MASA_FORMAT || ( st_ivas->ivas_format == MC_FORMAT && st_ivas->mc_mode == MC_MODE_MCMASA ) ) +#endif { hCPE->nchan_out = CPE_CHANNELS; } diff --git a/lib_dec/ivas_stereo_mdct_stereo_dec.c b/lib_dec/ivas_stereo_mdct_stereo_dec.c old mode 100644 new mode 100755 index 67b893a9f21729bc2f88a3c6a2015aac04dac89b..480466ec18b53ade4862704638fb11026bc5cb93 --- a/lib_dec/ivas_stereo_mdct_stereo_dec.c +++ b/lib_dec/ivas_stereo_mdct_stereo_dec.c @@ -655,7 +655,11 @@ void applyDmxMdctStereo( fade = 0.f; dmx_len = crossfade_len; } +#ifdef FIX_MDCT_AND_MC_MONO_ISSUES + else if ( hCPE->last_element_mode == IVAS_CPE_DFT && hCPE->last_element_brate <= IVAS_32k ) +#else else if ( hCPE->last_element_mode == IVAS_CPE_DFT && hCPE->last_element_brate <= IVAS_24k4 ) +#endif { crossfade_len = NS2SA( hCPE->hCoreCoder[0]->output_Fs, DELAY_CLDFB_NS ); step /= crossfade_len; diff --git a/lib_dec/ivas_stereo_switching_dec.c b/lib_dec/ivas_stereo_switching_dec.c old mode 100644 new mode 100755 index 56c1cfef5428aab7cd7b09857e367bd7a3f88e4a..8db6ae3ac3e904ea05b58eb09158cb521e07efda --- a/lib_dec/ivas_stereo_switching_dec.c +++ b/lib_dec/ivas_stereo_switching_dec.c @@ -716,7 +716,18 @@ ivas_error stereo_memory_dec( if ( hCPE->last_element_mode == IVAS_CPE_DFT ) { +#ifdef FIX_MDCT_AND_MC_MONO_ISSUES + if ( hCPE->nchan_out == 1 ) + { + cpy_tcx_ltp_data( hCPE->hCoreCoder[0]->hTcxLtpDec, hCPE->hCoreCoder[1]->hTcxLtpDec, output_Fs ); + } + else + { + cpy_tcx_ltp_data( &tcxLtpTmp, hCPE->hCoreCoder[1]->hTcxLtpDec, output_Fs ); + } +#else cpy_tcx_ltp_data( &tcxLtpTmp, hCPE->hCoreCoder[1]->hTcxLtpDec, output_Fs ); +#endif } if ( hCPE->last_element_mode == IVAS_CPE_TD ) @@ -975,6 +986,9 @@ void synchro_synthesis( int16_t dft_mono_brate_switch; int16_t delay_diff; float tmpF; +#ifdef FIX_MDCT_AND_MC_MONO_ISSUES + int16_t nChannels; +#endif sts = hCPE->hCoreCoder; output_Fs = sts[0]->output_Fs; @@ -1044,6 +1058,14 @@ void synchro_synthesis( } } +#ifdef FIX_MDCT_AND_MC_MONO_ISSUES + if ( hCPE->nchan_out == 1 && hCPE->last_element_mode == IVAS_CPE_MDCT ) + { + v_add( sts[0]->prev_synth_buffer, sts[1]->prev_synth_buffer, sts[0]->prev_synth_buffer, delay_comp_DFT ); + v_multc( sts[0]->prev_synth_buffer, INV_SQRT_2, sts[0]->prev_synth_buffer, delay_comp_DFT ); + } +#endif + if ( use_cldfb_for_last_dft ) { /* delay CLDFB-based mono output (<= 24.4 kbps) to be aligned with DFT-based mono output (32 kbps), needed to avoid discontinuities with TCX-LTP. */ @@ -1139,6 +1161,16 @@ void synchro_synthesis( } } +#ifdef FIX_MDCT_AND_MC_MONO_ISSUES + /* if previous frame had only one channel copy buffers to other channel */ + if ( hCPE->nchan_out == 1 && hCPE->element_mode == IVAS_CPE_MDCT && hCPE->last_element_mode == IVAS_CPE_DFT ) + { + mvr2r( sts[0]->prev_synth_buffer, sts[1]->prev_synth_buffer, delay_comp_TD ); + mvr2r( tmp_out[0], tmp_out[1], delay_cldfb ); + mvr2r( p_output_mem[0], p_output_mem[1], delay_diff ); + } +#endif + /*----------------------------------------------------------------* * update DFT synthesis overlap memory @output_Fs; needed for TD->DFT stereo switching *----------------------------------------------------------------*/ @@ -1222,7 +1254,12 @@ void synchro_synthesis( } } +#ifdef FIX_MDCT_AND_MC_MONO_ISSUES + nChannels = ( hCPE->element_mode == IVAS_CPE_MDCT ) ? 2 : hCPE->nchan_out; + for ( n = 0; n < nChannels; n++ ) +#else for ( n = 0; n < hCPE->nchan_out; n++ ) +#endif { if ( hCPE->element_mode == IVAS_CPE_MDCT ) { @@ -1255,7 +1292,11 @@ void synchro_synthesis( } /* cross-fading between DFT OLA memory and TD output */ +#ifdef FIX_MDCT_AND_MC_MONO_ISSUES + for ( n = 0; n < nChannels; n++ ) +#else for ( n = 0; n < hCPE->nchan_out; n++ ) +#endif { if ( hCPE->element_mode == IVAS_CPE_MDCT ) { diff --git a/lib_rend/lib_rend.c b/lib_rend/lib_rend.c index 3764f1139cb46b288c5c2a7b7b70b1457d13abfc..179b8214b58a531a40a312459f3de508b9dcac11 100644 --- a/lib_rend/lib_rend.c +++ b/lib_rend/lib_rend.c @@ -1220,6 +1220,12 @@ static ivas_error initMcPanGainsWithMonoOut( inputMc->panGains[i][0] = 1.f; } } + else if ( inputMc->base.inConfig == IVAS_REND_AUDIO_CONFIG_STEREO ) + { + /* Special case for STEREO to MONO: Passive downmix (L+R)/2 */ + inputMc->panGains[0][0] = 0.5; + inputMc->panGains[1][0] = 0.5; + } else { /* ls_conversion_cicpX_stereo contains gains for side speakers. diff --git a/scripts/config/self_test.prm b/scripts/config/self_test.prm index 4923bbb2f840102ebfcde9d0c3fb889a850f5ef5..50d8447a06875fe3b52af4e36dc6d18431e38e2d 100644 --- a/scripts/config/self_test.prm +++ b/scripts/config/self_test.prm @@ -794,6 +794,10 @@ ../IVAS_cod -mc 5_1 192000 48 testv/stv51MC48c.pcm bit ../IVAS_dec BINAURAL_ROOM 48 bit testv/stv51MC48c.pcm_MC51_192000_48-48_BinauralRoom.tst +// Multi-channel 5_1 at 256 kbps, 48kHz in, 48kHz out, MONO out +../IVAS_cod -mc 5_1 256000 48 testv/stv51MC48c.pcm bit +../IVAS_dec MONO 48 bit testv/stv51MC48c.pcm_MC51_256000_48-48_mono.tst + // Multi-channel 5_1 at 256 kbps, 48kHz in, 48kHz out, BINAURAL ROOM out, head rotation ../IVAS_cod -mc 5_1 256000 48 testv/stv51MC48c.pcm bit ../IVAS_dec -t testv/headrot_case00_3000_q.csv BINAURAL_ROOM 48 bit testv/stv51MC48c.pcm_MC51_256000_48-48_BinauralRoom_Headrot.tst diff --git a/scripts/pyaudio3dtools/spatialaudioconvert.py b/scripts/pyaudio3dtools/spatialaudioconvert.py index 5163c7c524f3d37a3eeeac6f0487ec7758cf8621..04c77d0ac4c21c1ed9d61e0cf712e64e30fceb00 100644 --- a/scripts/pyaudio3dtools/spatialaudioconvert.py +++ b/scripts/pyaudio3dtools/spatialaudioconvert.py @@ -337,7 +337,10 @@ def convert_mc( ) -> np.ndarray: """Convert a multichannel signal to the requested output format""" # MC -> LS - if out_spfmt.isloudspeaker: + if in_spfmt.name == "STEREO" and out_spfmt.name == "MONO": + MC2LS = np.vstack([[0.5], [0.5]]) + return in_sig @ MC2LS + elif out_spfmt.isloudspeaker: try: MC2LS = IVAS_MC_CONVERSION[in_spfmt.name][out_spfmt.name] except KeyError: diff --git a/tests/renderer/constants.py b/tests/renderer/constants.py index adb600768f3c10f2ebfc8b5fa3541084e23dcdab..4c890cd026d22b77698f11242edbbb68d138ae00 100644 --- a/tests/renderer/constants.py +++ b/tests/renderer/constants.py @@ -541,37 +541,36 @@ pass_snr = { "test_multichannel_binaural_static_vs_decoder[7_1-BINAURAL]": 74, "test_multichannel_binaural_static_vs_decoder[7_1-BINAURAL_ROOM]": 19, "test_multichannel_binaural_static_vs_decoder[7_1_4-BINAURAL_ROOM]": 18, - # Failure reason: R channel in MONO output is delayed - "test_multichannel_vs_decoder[5_1_2-MONO]": 1, - "test_multichannel_vs_decoder[5_1_4-MONO]": 1, - "test_multichannel_vs_decoder[5_1-MONO]": 1, - "test_multichannel_vs_decoder[7_1_4-MONO]": 1, - "test_multichannel_vs_decoder[7_1-MONO]": 1, - "test_multichannel_vs_decoder[STEREO-MONO]": 17, # Failure reason: Active dmx (decoder) vs Passive dmx (renderer) - "test_multichannel_vs_decoder[5_1_2-STEREO]": 44, - "test_multichannel_vs_decoder[5_1_4-STEREO]": 48, + "test_multichannel_vs_decoder[5_1-5_1_2]": 62, + "test_multichannel_vs_decoder[5_1-5_1_4]": 62, + "test_multichannel_vs_decoder[5_1-7_1]": 62, + "test_multichannel_vs_decoder[5_1-7_1_4]": 62, + "test_multichannel_vs_decoder[5_1-MONO]": 43, "test_multichannel_vs_decoder[5_1-STEREO]": 48, - "test_multichannel_vs_decoder[7_1_4-STEREO]": 46, - "test_multichannel_vs_decoder[7_1-STEREO]": 44, - "test_multichannel_vs_decoder[5_1_2-5_1_4]": 63, "test_multichannel_vs_decoder[5_1_2-5_1]": 63, - "test_multichannel_vs_decoder[5_1_2-7_1_4]": 63, + "test_multichannel_vs_decoder[5_1_2-5_1_4]": 63, "test_multichannel_vs_decoder[5_1_2-7_1]": 63, - "test_multichannel_vs_decoder[5_1_4-5_1_2]": 63, + "test_multichannel_vs_decoder[5_1_2-7_1_4]": 63, + "test_multichannel_vs_decoder[5_1_2-MONO]": 38, + "test_multichannel_vs_decoder[5_1_2-STEREO]": 44, "test_multichannel_vs_decoder[5_1_4-5_1]": 62, - "test_multichannel_vs_decoder[5_1_4-7_1_4]": 61, + "test_multichannel_vs_decoder[5_1_4-5_1_2]": 63, "test_multichannel_vs_decoder[5_1_4-7_1]": 62, - "test_multichannel_vs_decoder[5_1-5_1_2]": 62, - "test_multichannel_vs_decoder[5_1-5_1_4]": 62, - "test_multichannel_vs_decoder[5_1-7_1_4]": 62, - "test_multichannel_vs_decoder[5_1-7_1]": 62, - "test_multichannel_vs_decoder[7_1_4-5_1_2]": 63, - "test_multichannel_vs_decoder[7_1_4-5_1_4]": 63, - "test_multichannel_vs_decoder[7_1_4-5_1]": 62, - "test_multichannel_vs_decoder[7_1_4-7_1]": 62, + "test_multichannel_vs_decoder[5_1_4-7_1_4]": 61, + "test_multichannel_vs_decoder[5_1_4-MONO]": 42, + "test_multichannel_vs_decoder[5_1_4-STEREO]": 48, + "test_multichannel_vs_decoder[7_1-5_1]": 63, "test_multichannel_vs_decoder[7_1-5_1_2]": 63, "test_multichannel_vs_decoder[7_1-5_1_4]": 63, - "test_multichannel_vs_decoder[7_1-5_1]": 63, "test_multichannel_vs_decoder[7_1-7_1_4]": 63, + "test_multichannel_vs_decoder[7_1-MONO]": 38, + "test_multichannel_vs_decoder[7_1-STEREO]": 44, + "test_multichannel_vs_decoder[7_1_4-5_1]": 62, + "test_multichannel_vs_decoder[7_1_4-5_1_2]": 63, + "test_multichannel_vs_decoder[7_1_4-5_1_4]": 63, + "test_multichannel_vs_decoder[7_1_4-7_1]": 62, + "test_multichannel_vs_decoder[7_1_4-MONO]": 41, + "test_multichannel_vs_decoder[7_1_4-STEREO]": 46, + "test_multichannel_vs_decoder[STEREO-MONO]": 17, }