diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index a3fa4217b7aefb21f1be5bf9789acd8038432e70..1efca3ccabc2a3ea686e33eac5aebdec737dd95f 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -48,9 +48,9 @@ variables: FAILED_TESTCASES_LIST: "failed-testcases.txt" ERRORS_TESTCASES_LIST: "errors-testcases.txt" PYTEST_CACHE_ARTIFACT: "pytest_cache.zip" - FLOAT_REF_COMMIT_FILE: "float-ref-git-sha" - CUT_COMMIT_FILE: "CuT-git-sha" - MERGE_TARGET_COMMIT_FILE: "merge-target-git-sha" + FLOAT_REF_COMMIT_FILE: "float-ref-git-sha.txt" + CUT_COMMIT_FILE: "CuT-git-sha.txt" + MERGE_TARGET_COMMIT_FILE: "merge-target-git-sha.txt" MANUAL_PIPELINE_TYPE: description: "Type for the manual pipeline run. Use 'pytest-compare' to run comparison test against reference float codec." value: 'default' @@ -171,7 +171,7 @@ stages: - git pull origin $FLOAT_REF_BRANCH - *activate-debug-mode-info-if-set - make clean - - make -j + - make -j >> /dev/null - mv ./IVAS_cod ./$REF_ENCODER_PATH_FOR_BUILD_DO_NOT_MODIFY - mv ./IVAS_dec ./$REF_DECODER_PATH_FOR_BUILD_DO_NOT_MODIFY - mv ./IVAS_rend ./IVAS_rend_ref @@ -187,7 +187,7 @@ stages: - git pull origin $CI_MERGE_REQUEST_TARGET_BRANCH_NAME - *activate-debug-mode-info-if-set - make clean - - make -j + - make -j >> /dev/null - mv ./IVAS_cod ./$MERGE_TARGET_ENCODER_PATH_FOR_BUILD_DO_NOT_MODIFY - mv ./IVAS_dec ./$MERGE_TARGET_DECODER_PATH_FOR_BUILD_DO_NOT_MODIFY - mv ./IVAS_rend ./IVAS_rend_merge_target @@ -202,7 +202,7 @@ stages: ### build dut binaries - *activate-debug-mode-info-if-set - make clean - - make -j + - make -j >> /dev/null .build-and-create-float-ref-outputs: &build-and-create-float-ref-outputs - *build-float-ref-and-dut-binaries @@ -223,7 +223,7 @@ stages: - enc_dmx_arg="--compare_enc_dmx" - fi - - python3 -m pytest $TEST_SUITE -v --update_ref 1 $enc_stats_arg $enc_dmx_arg --create_ref -n auto --ref_encoder_path $REF_ENCODER_PATH --ref_decoder_path $REF_DECODER_PATH --dut_encoder_path $DUT_ENCODER_PATH --dut_decoder_path $DUT_DECODER_PATH || exit_code=$? + - python3 -m pytest "$TEST_SUITE" -v --update_ref 1 $enc_stats_arg $enc_dmx_arg --create_ref -n auto --ref_encoder_path $REF_ENCODER_PATH --ref_decoder_path $REF_DECODER_PATH --dut_encoder_path $DUT_ENCODER_PATH --dut_decoder_path $DUT_DECODER_PATH || exit_code=$? .update-scripts-repo: &update-scripts-repo - cd $SCRIPTS_DIR @@ -509,15 +509,18 @@ stages: - *build-float-ref-binaries - *build-merge-target-binaries - make clean - - make -j + - make -j >> /dev/null - *check-up-to-date-in-comparison-jobs + # hack for quicker testing + - TEST_SUITE="tests/codec_be_on_mr_nonselection/test_param_file.py::test_param_file_tests[stv-stereo at 48 kbps, 32kHz in, 32kHz out, random FER at 5%]" + - exit_code_target=0 - - python3 -m pytest $TEST_SUITE -v --update_ref 1 --create_ref -n auto --ref_encoder_path $MERGE_TARGET_ENCODER_PATH --ref_decoder_path $MERGE_TARGET_DECODER_PATH --dut_encoder_path $DUT_ENCODER_PATH --dut_decoder_path $DUT_DECODER_PATH > $PYTEST_LOG_TARGET_BRANCH || exit_code_target=$? + - python3 -m pytest "$TEST_SUITE" -v --update_ref 1 --create_ref -n auto --ref_encoder_path $MERGE_TARGET_ENCODER_PATH --ref_decoder_path $MERGE_TARGET_DECODER_PATH --dut_encoder_path $DUT_ENCODER_PATH --dut_decoder_path $DUT_DECODER_PATH > $PYTEST_LOG_TARGET_BRANCH || exit_code_target=$? - exit_code=0 - rm -rf .pytest_cache || true - - python3 -m pytest --tb=no -q $TEST_SUITE -v --keep_files --create_cut --html=$HTML_REPORT --self-contained-html --junit-xml=$XML_REPORT --ref_encoder_path $MERGE_TARGET_ENCODER_PATH --ref_decoder_path $MERGE_TARGET_DECODER_PATH --dut_encoder_path $DUT_ENCODER_PATH --dut_decoder_path $DUT_DECODER_PATH -n auto --testcase_timeout $testcase_timeout > pytest_log.txt || exit_code=$? + - python3 -m pytest --tb=no -q "$TEST_SUITE" -v --keep_files --create_cut --html=$HTML_REPORT --self-contained-html --junit-xml=$XML_REPORT --ref_encoder_path $MERGE_TARGET_ENCODER_PATH --ref_decoder_path $MERGE_TARGET_DECODER_PATH --dut_encoder_path $DUT_ENCODER_PATH --dut_decoder_path $DUT_DECODER_PATH -n auto --testcase_timeout $testcase_timeout > pytest_log.txt || exit_code=$? - if [ $exit_code -ne 0 ]; then - exit_code=$EXIT_CODE_NON_BE @@ -639,13 +642,16 @@ stages: - INV_LEVEL_SCALING=$(awk "BEGIN {print 1.0 / $LEVEL_SCALING}") - comp_args="--mld --ssnr --odg --scalefac $INV_LEVEL_SCALING" + # hack for quicker testing + - TEST_SUITE="tests/codec_be_on_mr_nonselection/test_param_file.py::test_param_file_tests[stv-stereo at 48 kbps, 32kHz in, 32kHz out, random FER at 5%]" + ### run branch first # this per default builds the branch and the reference and creates the reference outputs - *build-and-create-float-ref-outputs - *check-up-to-date-in-comparison-jobs # need to restore cache again - *overwrite-pytest-cache-with-artifact - - python3 -m pytest --tb=no -q $TEST_SUITE -v --keep_files --create_cut --html=$HTML_REPORT_BRANCH --self-contained-html --junit-xml=$XML_REPORT_BRANCH $comp_args --ref_encoder_path $REF_ENCODER_PATH --ref_decoder_path $REF_DECODER_PATH --dut_encoder_path $DUT_ENCODER_PATH --dut_decoder_path $DUT_DECODER_PATH -n auto --testcase_timeout $testcase_timeout || true + - python3 -m pytest --tb=no -q "$TEST_SUITE" -v --keep_files --create_cut --html=$HTML_REPORT_BRANCH --self-contained-html --junit-xml=$XML_REPORT_BRANCH $comp_args --ref_encoder_path $REF_ENCODER_PATH --ref_decoder_path $REF_DECODER_PATH --dut_encoder_path $DUT_ENCODER_PATH --dut_decoder_path $DUT_DECODER_PATH -n auto --testcase_timeout $testcase_timeout || true - zero_errors_branch=$(cat $XML_REPORT_BRANCH | grep -c 'errors="0"') || true - python3 scripts/parse_xml_report.py $XML_REPORT_BRANCH $CSV_BRANCH @@ -660,10 +666,10 @@ stages: - git checkout $CI_MERGE_REQUEST_TARGET_BRANCH_NAME - git pull origin $CI_MERGE_REQUEST_TARGET_BRANCH_NAME - make clean - - make -j + - make -j >> /dev/null # need to restore cache again - *overwrite-pytest-cache-with-artifact - - python3 -m pytest --tb=no -q $TEST_SUITE -v --keep_files --create_cut --html=$HTML_REPORT_MAIN --self-contained-html --junit-xml=$XML_REPORT_MAIN $comp_args --ref_encoder_path $REF_ENCODER_PATH --ref_decoder_path $REF_DECODER_PATH --dut_encoder_path $DUT_ENCODER_PATH --dut_decoder_path $DUT_DECODER_PATH -n auto --testcase_timeout $testcase_timeout || true + - python3 -m pytest --tb=no -q "$TEST_SUITE" -v --keep_files --create_cut --html=$HTML_REPORT_MAIN --self-contained-html --junit-xml=$XML_REPORT_MAIN $comp_args --ref_encoder_path $REF_ENCODER_PATH --ref_decoder_path $REF_DECODER_PATH --dut_encoder_path $DUT_ENCODER_PATH --dut_decoder_path $DUT_DECODER_PATH -n auto --testcase_timeout $testcase_timeout || true - python3 scripts/parse_xml_report.py $XML_REPORT_MAIN $CSV_MAIN ### compare the two csv files for regressions @@ -759,7 +765,7 @@ stages: - make_args="$make_args IGNORELIST=1" - fi - make clean - - make -j $make_args + - make -j $make_args >> /dev/null - testcase_timeout_arg="--testcase_timeout $TESTCASE_TIMEOUT_LTV_SANITIZERS" # disable per-testcase timeout for msan to evaluate what is going on that it takes so long - if [[ $CLANG_NUM = 1 ]]; then @@ -899,7 +905,7 @@ branch-is-up-to-date-with-target-post: exit 1 fi -clang-format-check: +.clang-format-check: extends: - .test-job-linux rules: @@ -972,7 +978,7 @@ build-codec-linux-make: - make -j # ensure that codec builds on linux with instrumentation active -build-codec-linux-instrumented-make: +.build-codec-linux-instrumented-make: rules: - if: $CI_PIPELINE_SOURCE == 'web' - if: $CI_PIPELINE_SOURCE == 'push' && $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH @@ -1030,7 +1036,7 @@ build-codec-windows-msbuild: # --------------------------------------------------------------- ### jobs that check for bitexactness of fx encoder and decoder -check-be-to-target-short-enc-0db: +.check-be-to-target-short-enc-0db: extends: - .rules-pytest-to-main-short - .test-job-linux @@ -1043,7 +1049,7 @@ check-be-to-target-short-enc-0db: - rm -rf tests/dut tests/ref <<: *check-be-to-target-anchor -check-be-to-target-short-enc-+10db: +.check-be-to-target-short-enc-+10db: extends: - .rules-pytest-to-main-short - .test-job-linux @@ -1056,7 +1062,7 @@ check-be-to-target-short-enc-+10db: - rm -rf tests/dut tests/ref <<: *check-be-to-target-anchor -check-be-to-target-short-enc--10db: +.check-be-to-target-short-enc--10db: extends: - .rules-pytest-to-main-short - .test-job-linux @@ -1069,7 +1075,7 @@ check-be-to-target-short-enc--10db: - rm -rf tests/dut tests/ref <<: *check-be-to-target-anchor -check-be-to-target-short-dec-0db: +.check-be-to-target-short-dec-0db: extends: - .rules-pytest-to-main-short - .test-job-linux @@ -1082,7 +1088,7 @@ check-be-to-target-short-dec-0db: - rm -rf tests/dut tests/ref <<: *check-be-to-target-anchor -check-be-to-target-short-dec-+10db: +.check-be-to-target-short-dec-+10db: extends: - .rules-pytest-to-main-short - .test-job-linux @@ -1109,7 +1115,7 @@ check-be-to-target-short-dec--10db: <<: *check-be-to-target-anchor ### jobs that check for regressions on non-BE testcases -check-regressions-short-enc-0db: +.check-regressions-short-enc-0db: stage: test needs: - job: "check-be-to-target-short-enc-0db" @@ -1125,7 +1131,7 @@ check-regressions-short-enc-0db: - rm -rf tests/dut tests/ref <<: *check-regressions-pytest-anchor -check-regressions-short-enc-+10db: +.check-regressions-short-enc-+10db: stage: test needs: - job: "check-be-to-target-short-enc-+10db" @@ -1141,7 +1147,7 @@ check-regressions-short-enc-+10db: - rm -rf tests/dut tests/ref <<: *check-regressions-pytest-anchor -check-regressions-short-enc--10db: +.check-regressions-short-enc--10db: stage: test needs: - job: "check-be-to-target-short-enc--10db" @@ -1157,7 +1163,7 @@ check-regressions-short-enc--10db: - rm -rf tests/dut tests/ref <<: *check-regressions-pytest-anchor -check-regressions-short-dec-0db: +.check-regressions-short-dec-0db: stage: test needs: - job: "check-be-to-target-short-dec-0db" @@ -1173,7 +1179,7 @@ check-regressions-short-dec-0db: - rm -rf tests/dut tests/ref <<: *check-regressions-pytest-anchor -check-regressions-short-dec-+10db: +.check-regressions-short-dec-+10db: stage: test needs: - job: "check-be-to-target-short-dec-+10db" @@ -1536,7 +1542,7 @@ coverage-test-on-main-scheduled: - *copy-ltv-files-to-testv-dir - *build-float-ref-binaries # Build DuT binaries with GCOV - - make clean + - make clean >> /dev/null - make GCOV=1 -j - cp IVAS_rend IVAS_rend_ref # Copy to ensure instrumented renderer is run in the first pytest call @@ -1580,7 +1586,7 @@ coverage-test-on-main-scheduled: # --------------------------------------------------------------- # check bitexactness to EVS -be-2-evs-26444: +.be-2-evs-26444: extends: - .test-job-linux rules: @@ -1594,7 +1600,7 @@ be-2-evs-26444: - *print-common-info - *update-scripts-repo - sed -i".bak" "s/\(#define EVS_FLOAT\)/\/\/\1/" lib_com/options.h - - make -j + - make -j >> /dev/null # copy over to never change the testvector dir - cp -r $EVS_BE_TEST_DIR_BASOP ./evs_be_test @@ -1658,7 +1664,7 @@ voip-be-on-merge-request: script: - *print-common-info - make clean - - make -j + - make -j >> /dev/null - python3 -m pytest tests/test_be_for_jbm_neutral_dly_profile.py diff --git a/lib_com/options.h b/lib_com/options.h index e8a44d873d9aaa3d8cc9b9db02792a309469fbec..8ee48411175b75ba540fd16051cb1d231ff67f0e 100644 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -82,6 +82,8 @@ #define HARM_PUSH_BIT #define HARM_ENC_INIT //#define HARM_SCE_INIT -#define DIV32_OPT_NEWTON /* FhG: faster 32 by 32 bit division */ +#define DIV32_OPT_NEWTON /* FhG: faster 32 by 32 bit division */ #define MERGE_REQUEST_1378_SPEEDUP_ivas_mc_param_enc_fx_NONBE /* FhG: reduce WMOPS of Cy calculation in ivas_param_mc_param_est_enc_fx() by using 64 Bit addition. Obsoletes IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST_NONBE. */ +#define FIX_1348_BIT_PRECISION_IMPROVEMENT +//#define FIX_1348_BIT_PRECISION_IMPROVEMENT_DYNAMIC_QOLD #endif diff --git a/lib_com/prot_fx.h b/lib_com/prot_fx.h index 4257b7e79fb16242288d0686fadb189565c7e78c..d1d885b3007e5b015735a4f683f2aa511150d487 100644 --- a/lib_com/prot_fx.h +++ b/lib_com/prot_fx.h @@ -9848,6 +9848,9 @@ void IMDCT_ivas_fx( const Word16 frame_cnt, const Word16 bfi, Word16 *old_out_fx, +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT_DYNAMIC_QOLD + Word16 *q_old_out_fx, +#endif const Word16 FB_flag, Decoder_State *st, const Word16 fullbandScale, diff --git a/lib_dec/dec_tcx_fx.c b/lib_dec/dec_tcx_fx.c index af80ee1f54e8672c19b05e6c29432d91639ccd3f..f97455c65ec67557ad2214b53d9794bca5192e31 100644 --- a/lib_dec/dec_tcx_fx.c +++ b/lib_dec/dec_tcx_fx.c @@ -2601,6 +2601,9 @@ void IMDCT_ivas_fx( const Word16 frame_cnt, const Word16 bfi, Word16 *old_out_fx, // Q(-2) +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT_DYNAMIC_QOLD + Word16 *q_old_out_fx, +#endif const Word16 FB_flag, Decoder_State *st, const Word16 fullbandScale, @@ -2862,9 +2865,16 @@ void IMDCT_ivas_fx( move16(); Word16 diff = sub( q_tmp_fx_32, q_win ); +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT_DYNAMIC_QOLD + Word16 q_old_out_diff = sub( q_tmp_fx_32, *q_old_out_fx ); +#endif FOR( Word16 ind = 0; ind < L_frame; ind++ ) { +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT_DYNAMIC_QOLD + old_out_fx_32[ind] = L_shl( old_out_fx[ind], q_old_out_diff ); +#else old_out_fx_32[ind] = L_shl( old_out_fx[ind], diff ); +#endif move32(); } @@ -2872,7 +2882,11 @@ void IMDCT_ivas_fx( FOR( Word16 ind = 0; ind < L_frame; ind++ ) { +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT_DYNAMIC_QOLD + old_out_fx[ind] = extract_l( L_shr( old_out_fx_32[ind], q_old_out_diff ) ); +#else old_out_fx[ind] = extract_l( L_shr( old_out_fx_32[ind], diff ) ); +#endif xn_buf_fx[ind] = extract_l( L_shr( xn_buf_fx_32[ind], diff ) ); move16(); move16(); @@ -2880,7 +2894,9 @@ void IMDCT_ivas_fx( } ELSE { +#ifndef FIX_1348_BIT_PRECISION_IMPROVEMENT_DYNAMIC_QOLD Word16 q_old_out = q_win; +#endif move16(); edct_fx( x_fx, xn_buf_fx_32 + add( shr( overlap, 1 ), nz ), L_frame, &q_xn_buf_fx_32 ); Word16 res_m, res_e; @@ -2904,15 +2920,21 @@ void IMDCT_ivas_fx( move16(); } +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT_DYNAMIC_QOLD + window_ola_fx( tmp_fx_32, xn_buf_fx, &q_tmp_fx_32, old_out_fx, q_old_out_fx, L_frame, hTcxCfg->tcx_last_overlap_mode, hTcxCfg->tcx_curr_overlap_mode, 0, 0, NULL ); +#else window_ola_fx( tmp_fx_32, xn_buf_fx, &q_tmp_fx_32, old_out_fx, &q_old_out, L_frame, hTcxCfg->tcx_last_overlap_mode, hTcxCfg->tcx_curr_overlap_mode, 0, 0, NULL ); q_diff = sub( q_old_out, q_win ); +#endif Word16 diff = sub( q_tmp_fx_32, q_win ); FOR( Word16 ind = 0; ind < L_frame; ind++ ) { +#ifndef FIX_1348_BIT_PRECISION_IMPROVEMENT_DYNAMIC_QOLD old_out_fx[ind] = shr_sat( old_out_fx[ind], q_diff ); move16(); +#endif xn_buf_fx[ind] = shr_sat( xn_buf_fx[ind], diff ); move16(); } @@ -3004,9 +3026,18 @@ void IMDCT_ivas_fx( IF( hTcxCfg->last_aldo != 0 ) { +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT_DYNAMIC_QOLD + Word16 tmp_old_out; + Word16 q_diff = sub( *q_old_out_fx, q_win ); +#endif FOR( i = 0; i < sub( overlap, tcx_mdct_window_min_length ); i++ ) { - xn_buf_fx[( i + ( ( overlap / 2 ) - tcx_offset ) )] = add_sat( xn_buf_fx[( i + ( ( overlap / 2 ) - tcx_offset ) )], old_out_fx[( i + nz )] ); // Q(-2) +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT_DYNAMIC_QOLD + tmp_old_out = shr_sat( old_out_fx[( i + nz )], q_diff ); + xn_buf_fx[( i + ( ( overlap / 2 ) - tcx_offset ) )] = add_sat( xn_buf_fx[( i + ( ( overlap / 2 ) - tcx_offset ) )], tmp_old_out ); // Q(-2) +#else + xn_buf_fx[( i + ( ( overlap / 2 ) - tcx_offset ) )] = add_sat( xn_buf_fx[( i + ( ( overlap / 2 ) - tcx_offset ) )], old_out_fx[( i + nz )] ); // Q(-2) +#endif move16(); } @@ -3019,17 +3050,32 @@ void IMDCT_ivas_fx( // tested FOR( ; i < overlap; i++ ) /* perfectly reconstructing ALDO shortening */ { - xn_buf_fx[( ( i + ( overlap / 2 ) ) - tcx_offset )] = add_sat( xn_buf_fx[( ( i + ( overlap / 2 ) ) - tcx_offset )], old_out_fx[( i + nz )] ); // Q(-2) +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT_DYNAMIC_QOLD + tmp_old_out = shr_sat( old_out_fx[( i + nz )], q_diff ); + xn_buf_fx[( ( i + ( overlap / 2 ) ) - tcx_offset )] = add_sat( xn_buf_fx[( ( i + ( overlap / 2 ) ) - tcx_offset )], tmp_old_out ); // Q(-2) +#else + xn_buf_fx[( ( i + ( overlap / 2 ) ) - tcx_offset )] = add_sat( xn_buf_fx[( ( i + ( overlap / 2 ) ) - tcx_offset )], old_out_fx[( i + nz )] ); // Q(-2) +#endif move16(); } FOR( i = 0; i < ( tcx_mdct_window_min_length / 2 ); i++ ) { - xn_buf_fx[( ( ( i + ( overlap / 2 ) ) - tcx_offset ) + overlap )] = add_sat( xn_buf_fx[( ( ( i + ( overlap / 2 ) ) - tcx_offset ) + overlap )], mult_r( old_out_fx[( ( i + nz ) + overlap )], tcx_mdct_window_minimum_fx[i].v.re ) ); // Q(-2) +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT_DYNAMIC_QOLD + tmp_old_out = shr_sat( old_out_fx[( ( i + nz ) + overlap )], q_diff ); + xn_buf_fx[( ( ( i + ( overlap / 2 ) ) - tcx_offset ) + overlap )] = add_sat( xn_buf_fx[( ( ( i + ( overlap / 2 ) ) - tcx_offset ) + overlap )], mult_r( tmp_old_out, tcx_mdct_window_minimum_fx[i].v.re ) ); // Q(-2) +#else + xn_buf_fx[( ( ( i + ( overlap / 2 ) ) - tcx_offset ) + overlap )] = add_sat( xn_buf_fx[( ( ( i + ( overlap / 2 ) ) - tcx_offset ) + overlap )], mult_r( old_out_fx[( ( i + nz ) + overlap )], tcx_mdct_window_minimum_fx[i].v.re ) ); // Q(-2) +#endif move16(); } FOR( ; i < tcx_mdct_window_min_length; i++ ) { +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT_DYNAMIC_QOLD + tmp_old_out = shr_sat( old_out_fx[( ( i + nz ) + overlap )], q_diff ); + xn_buf_fx[( ( ( i + ( overlap / 2 ) ) - tcx_offset ) + overlap )] = add_sat( xn_buf_fx[( ( ( i + ( overlap / 2 ) ) - tcx_offset ) + overlap )], mult_r( tmp_old_out, tcx_mdct_window_minimum_fx[( tcx_mdct_window_min_length - ( 1 + i ) )].v.im ) ); // Q(-2) +#else xn_buf_fx[( ( ( i + ( overlap / 2 ) ) - tcx_offset ) + overlap )] = add_sat( xn_buf_fx[( ( ( i + ( overlap / 2 ) ) - tcx_offset ) + overlap )], mult_r( old_out_fx[( ( i + nz ) + overlap )], tcx_mdct_window_minimum_fx[( tcx_mdct_window_min_length - ( 1 + i ) )].v.im ) ); // Q(-2) +#endif move16(); } } @@ -3037,12 +3083,22 @@ void IMDCT_ivas_fx( { FOR( ; i < ( overlap - ( tcx_mdct_window_min_length / 2 ) ); i++ ) { - xn_buf_fx[( ( i + ( overlap / 2 ) ) - tcx_offset )] = add_sat( xn_buf_fx[( ( i + ( overlap / 2 ) ) - tcx_offset )], mult_r( old_out_fx[( i + nz )], tcx_mdct_window_minimum_fx[( ( tcx_mdct_window_min_length - overlap ) + i )].v.re ) ); // Q(-2) +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT_DYNAMIC_QOLD + tmp_old_out = shr_sat( old_out_fx[( i + nz )], q_diff ); + xn_buf_fx[( ( i + ( overlap / 2 ) ) - tcx_offset )] = add_sat( xn_buf_fx[( ( i + ( overlap / 2 ) ) - tcx_offset )], mult_r( tmp_old_out, tcx_mdct_window_minimum_fx[( ( tcx_mdct_window_min_length - overlap ) + i )].v.re ) ); // Q(-2) +#else + xn_buf_fx[( ( i + ( overlap / 2 ) ) - tcx_offset )] = add_sat( xn_buf_fx[( ( i + ( overlap / 2 ) ) - tcx_offset )], mult_r( old_out_fx[( i + nz )], tcx_mdct_window_minimum_fx[( ( tcx_mdct_window_min_length - overlap ) + i )].v.re ) ); // Q(-2) +#endif move16(); } FOR( ; i < overlap; i++ ) { - xn_buf_fx[( i + ( ( overlap / 2 ) - tcx_offset ) )] = add_sat( xn_buf_fx[( ( i + ( overlap / 2 ) ) - tcx_offset )], mult_r( old_out_fx[( i + nz )], tcx_mdct_window_minimum_fx[( overlap - ( 1 + i ) )].v.im ) ); // Q(-2) +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT_DYNAMIC_QOLD + tmp_old_out = shr_sat( old_out_fx[( i + nz )], q_diff ); + xn_buf_fx[( i + ( ( overlap / 2 ) - tcx_offset ) )] = add_sat( xn_buf_fx[( ( i + ( overlap / 2 ) ) - tcx_offset )], mult_r( tmp_old_out, tcx_mdct_window_minimum_fx[( overlap - ( 1 + i ) )].v.im ) ); // Q(-2) +#else + xn_buf_fx[( i + ( ( overlap / 2 ) - tcx_offset ) )] = add_sat( xn_buf_fx[( ( i + ( overlap / 2 ) ) - tcx_offset )], mult_r( old_out_fx[( i + nz )], tcx_mdct_window_minimum_fx[( overlap - ( 1 + i ) )].v.im ) ); // Q(-2) +#endif move16(); } } @@ -3091,7 +3147,9 @@ void IMDCT_ivas_fx( /* Compute windowed synthesis in case of switching to ALDO windows in next frame */ Copy( xn_buf_fx + sub( L_frame, nz ), old_out_fx, add( nz, overlap ) ); set16_fx( old_out_fx + add( nz, overlap ), 0, nz ); - +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT_DYNAMIC_QOLD + *q_old_out_fx = q_win; +#endif tcx_windowing_synthesis_past_frame( old_out_fx + nz, tcx_aldo_window_1_trunc_fx, tcx_mdct_window_half_fx, tcx_mdct_window_minimum_fx, overlap, tcx_mdct_window_half_length, tcx_mdct_window_min_length, hTcxCfg->tcx_curr_overlap_mode ); /* If current overlap mode = FULL_OVERLAP -> ALDO_WINDOW */ @@ -5196,7 +5254,11 @@ void decoder_tcx_imdct_fx( IMDCT_ivas_fx( xn_bufFB_fx, q_x, hTcxDec->syn_Overl, hTcxDec->syn_Overl_TDAC, xn_buf_fx, hTcxCfg->tcx_aldo_window_1, hTcxCfg->tcx_aldo_window_1_trunc, hTcxCfg->tcx_aldo_window_2, hTcxCfg->tcx_mdct_window_half, hTcxCfg->tcx_mdct_window_minimum, hTcxCfg->tcx_mdct_window_trans, hTcxCfg->tcx_mdct_window_half_length, hTcxCfg->tcx_mdct_window_min_length, index, +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT_DYNAMIC_QOLD + kernelType, left_rect, tcx_offset, overlap, L_frame, L_frameTCX, shr( s_max( L_frameTCX, L_spec ), 1 ), L_frame_glob, frame_cnt, bfi, st->hHQ_core->old_out_LB_fx, &st->hHQ_core->q_old_out_LB_fx, 0, st, 0, acelp_zir_fx, q_win ); +#else kernelType, left_rect, tcx_offset, overlap, L_frame, L_frameTCX, shr( s_max( L_frameTCX, L_spec ), 1 ), L_frame_glob, frame_cnt, bfi, st->hHQ_core->old_out_LB_fx, 0, st, 0, acelp_zir_fx, q_win ); +#endif } /* Generate additional comfort noise to mask potential coding artefacts */ @@ -5218,7 +5280,11 @@ void decoder_tcx_imdct_fx( Copy32( x_fx, xn_bufFB_fx, s_max( L_spec, s_max( L_frame, L_frameTCX ) ) ); // q_x IMDCT_ivas_fx( xn_bufFB_fx, q_x, hTcxDec->syn_Overl, hTcxDec->syn_Overl_TDAC, xn_buf_fx, hTcxCfg->tcx_aldo_window_1, hTcxCfg->tcx_aldo_window_1_trunc, hTcxCfg->tcx_aldo_window_2, hTcxCfg->tcx_mdct_window_half, hTcxCfg->tcx_mdct_window_minimum, hTcxCfg->tcx_mdct_window_trans, hTcxCfg->tcx_mdct_window_half_length, hTcxCfg->tcx_mdct_window_min_length, index, +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT_DYNAMIC_QOLD + kernelType, left_rect, tcx_offset, overlap, L_frame, L_frameTCX, shr( s_max( L_frameTCX, L_spec ), 1 ), L_frame_glob, frame_cnt, bfi, st->hHQ_core->old_out_LB_fx, &st->hHQ_core->q_old_out_LB_fx, 0, st, 0, acelp_zir_fx, q_win ); +#else kernelType, left_rect, tcx_offset, overlap, L_frame, L_frameTCX, shr( s_max( L_frameTCX, L_spec ), 1 ), L_frame_glob, frame_cnt, bfi, st->hHQ_core->old_out_LB_fx, 0, st, 0, acelp_zir_fx, q_win ); +#endif } FOR( Word16 ind = 0; ind < L_MDCT_OVLP_MAX + L_FRAME_PLUS + L_MDCT_OVLP_MAX; ind++ ) @@ -5235,13 +5301,21 @@ void decoder_tcx_imdct_fx( { IMDCT_ivas_fx( x_tmp_fx, q_x, hTcxDec->syn_OverlFB, hTcxDec->syn_Overl_TDACFB, xn_bufFB_fx_16, hTcxCfg->tcx_aldo_window_1_FB, hTcxCfg->tcx_aldo_window_1_FB_trunc, hTcxCfg->tcx_aldo_window_2_FB, hTcxCfg->tcx_mdct_window_halfFB, hTcxCfg->tcx_mdct_window_minimumFB, hTcxCfg->tcx_mdct_window_transFB, hTcxCfg->tcx_mdct_window_half_lengthFB, hTcxCfg->tcx_mdct_window_min_lengthFB, index, +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT_DYNAMIC_QOLD + kernelType, left_rect, tcx_offsetFB, overlapFB, L_frameTCX, L_frameTCX, shr( s_max( L_frameTCX, L_spec ), 1 ), L_frameTCX_glob, frame_cnt, bfi, st->hHQ_core->old_out_fx, &st->hHQ_core->q_old_out_fx, 1, st, ratio, acelp_zir_fx, q_win ); +#else kernelType, left_rect, tcx_offsetFB, overlapFB, L_frameTCX, L_frameTCX, shr( s_max( L_frameTCX, L_spec ), 1 ), L_frameTCX_glob, frame_cnt, bfi, st->hHQ_core->old_out_fx, 1, st, ratio, acelp_zir_fx, q_win ); +#endif } ELSE { IMDCT_ivas_fx( x_fx, q_x, hTcxDec->syn_OverlFB, hTcxDec->syn_Overl_TDACFB, xn_bufFB_fx_16, hTcxCfg->tcx_aldo_window_1_FB, hTcxCfg->tcx_aldo_window_1_FB_trunc, hTcxCfg->tcx_aldo_window_2_FB, hTcxCfg->tcx_mdct_window_halfFB, hTcxCfg->tcx_mdct_window_minimumFB, hTcxCfg->tcx_mdct_window_transFB, hTcxCfg->tcx_mdct_window_half_lengthFB, hTcxCfg->tcx_mdct_window_min_lengthFB, index, +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT_DYNAMIC_QOLD + kernelType, left_rect, tcx_offsetFB, overlapFB, L_frameTCX, L_frameTCX, shr( s_max( L_frameTCX, L_spec ), 1 ), L_frameTCX_glob, frame_cnt, bfi, st->hHQ_core->old_out_fx, &st->hHQ_core->q_old_out_fx, 1, st, ratio, acelp_zir_fx, q_win ); +#else kernelType, left_rect, tcx_offsetFB, overlapFB, L_frameTCX, L_frameTCX, shr( s_max( L_frameTCX, L_spec ), 1 ), L_frameTCX_glob, frame_cnt, bfi, st->hHQ_core->old_out_fx, 1, st, ratio, acelp_zir_fx, q_win ); +#endif } FOR( Word16 ind = 0; ind < L_MDCT_OVLP_MAX + L_FRAME_PLUS + L_MDCT_OVLP_MAX; ind++ ) { diff --git a/lib_dec/hq_core_dec_fx.c b/lib_dec/hq_core_dec_fx.c index 4d1ef677471054485e43ff9c2a27c3b2058f86d3..7d2a3c7a581ddd66d3b915f5f0d3f99f6b213c03 100644 --- a/lib_dec/hq_core_dec_fx.c +++ b/lib_dec/hq_core_dec_fx.c @@ -1202,6 +1202,10 @@ void HQ_core_dec_init_fx( hHQ_core->Q_old_wtda = 15; hHQ_core->Q_old_postdec = 0; hHQ_core->Q_old_wtda_LB = 0; +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT_DYNAMIC_QOLD + hHQ_core->q_old_out_fx = 0; + hHQ_core->q_old_out_LB_fx = 0; +#endif move16(); move16(); move16(); diff --git a/lib_dec/ivas_mdct_core_dec_fx.c b/lib_dec/ivas_mdct_core_dec_fx.c index fabebede4e6e468526331188d065f9086d1715c9..77673c3bb9cf228a0a0bf795acfbc2fb0bc3e986 100644 --- a/lib_dec/ivas_mdct_core_dec_fx.c +++ b/lib_dec/ivas_mdct_core_dec_fx.c @@ -1098,7 +1098,11 @@ void ivas_mdct_core_reconstruct_fx( Word16 *synthFB_fx; Word16 q_syn = 0; move16(); +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT + Word16 q_win = -1; +#else Word16 q_win = -2; +#endif move16(); /* TCX */ Word16 xn_buf_fx[L_MDCT_OVLP_MAX + L_FRAME_PLUS + L_MDCT_OVLP_MAX]; // Q(-2) @@ -1172,10 +1176,12 @@ void ivas_mdct_core_reconstruct_fx( Scale_sig( st->hTcxDec->syn_Overl_TDAC, L_FRAME32k / 2, sub( q_win, st->hTcxDec->Q_syn_Overl_TDAC ) ); // st->hTcxDec->Q_syn_Overl_TDAC -> q_win Scale_sig( st->hTcxDec->old_syn_Overl, L_FRAME32k / 2, sub( q_win, st->hTcxDec->Q_old_syn_Overl ) ); // Q(-1 - st->Q_syn) -> q_win st->hTcxDec->Q_old_syn_Overl = q_win; - Scale_sig( st->hTcxDec->syn_Overl, L_FRAME32k / 2, sub( q_win, st->Q_syn ) ); // Q(st->Q_syn) -> q_win - Scale_sig( st->hTcxDec->syn_OverlFB, L_FRAME_MAX / 2, sub( q_win, st->Q_syn ) ); // Q(st->Q_syn) -> q_win - Scale_sig( st->hHQ_core->old_out_LB_fx, L_FRAME32k, sub( q_win, st->hHQ_core->Q_old_wtda_LB ) ); // Q(st->hHQ_core->Q_old_wtda_LB) -> q_win - Scale_sig( st->hHQ_core->old_out_fx, L_FRAME48k, sub( q_win, st->hHQ_core->Q_old_wtda ) ); // Q(st->hHQ_core->Q_old_wtda) -> q_win + Scale_sig( st->hTcxDec->syn_Overl, L_FRAME32k / 2, sub( q_win, st->Q_syn ) ); // Q(st->Q_syn) -> q_win + Scale_sig( st->hTcxDec->syn_OverlFB, L_FRAME_MAX / 2, sub( q_win, st->Q_syn ) ); // Q(st->Q_syn) -> q_win +#ifndef FIX_1348_BIT_PRECISION_IMPROVEMENT_DYNAMIC_QOLD + Scale_sig( st->hHQ_core->old_out_LB_fx, L_FRAME32k, sub( q_win, st->hHQ_core->Q_old_wtda_LB ) ); // Q(st->hHQ_core->Q_old_wtda_LB) -> q_win + Scale_sig( st->hHQ_core->old_out_fx, L_FRAME48k, sub( q_win, st->hHQ_core->Q_old_wtda ) ); // Q(st->hHQ_core->Q_old_wtda) -> q_win +#endif Scale_sig( synth_buf_fx, add( add( st->hTcxDec->old_synth_len, L_FRAME_PLUS ), M ), sub( q_win, q_syn ) ); // q_syn -> q_win Scale_sig( synth_bufFB_fx, add( add( st->hTcxDec->old_synth_lenFB, L_FRAME_PLUS ), M ), sub( q_win, q_syn ) ); // q_syn -> q_win Scale_sig( st->syn, M + 1, sub( q_win, st->Q_syn ) ); // st->Q_syn -> q_win @@ -1206,6 +1212,10 @@ void ivas_mdct_core_reconstruct_fx( /* Note: these buffers are not subframe-based, hence no indexing with k */ set16_fx( &st->hHQ_core->old_out_LB_fx[0], 0, L_frame[ch] ); set16_fx( &st->hHQ_core->old_out_fx[0], 0, L_frameTCX[ch] ); +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT_DYNAMIC_QOLD + st->hHQ_core->q_old_out_LB_fx = 0; + st->hHQ_core->q_old_out_fx = 0; +#endif set16_fx( &st->hTcxDec->syn_Overl[0], 0, shr( L_frame[ch], 1 ) ); set16_fx( &st->hTcxDec->syn_OverlFB[0], 0, shr( L_frameTCX[ch], 1 ) ); set16_fx( &st->hTcxDec->syn_Overl_TDAC[0], 0, shr( L_frame[ch], 1 ) ); @@ -1246,12 +1256,17 @@ void ivas_mdct_core_reconstruct_fx( Scale_sig( st->syn, M + 1, add( st->Q_syn, 2 ) ); Scale_sig( st->hTcxDec->syn_OverlFB, L_FRAME_MAX / 2, sub( st->Q_syn, q_win ) ); // q_win -> st->Q_syn Scale_sig( st->hTcxDec->syn_Overl, L_FRAME32k / 2, sub( st->Q_syn, q_win ) ); // q_win -> st->Q_syn - Scale_sig( st->hHQ_core->old_out_LB_fx, L_FRAME32k, sub( st->Q_syn, q_win ) ); // q_win -> st->Q_syn - Scale_sig( st->hHQ_core->old_out_fx, L_FRAME48k, sub( st->Q_syn, q_win ) ); // q_win -> st->Q_syn +#ifndef FIX_1348_BIT_PRECISION_IMPROVEMENT_DYNAMIC_QOLD + Scale_sig( st->hHQ_core->old_out_LB_fx, L_FRAME32k, sub( st->Q_syn, q_win ) ); // q_win -> st->Q_syn + Scale_sig( st->hHQ_core->old_out_fx, L_FRAME48k, sub( st->Q_syn, q_win ) ); // q_win -> st->Q_syn +#endif st->hHQ_core->Q_old_wtda = st->Q_syn; move16(); st->hHQ_core->Q_old_wtda_LB = st->Q_syn; move16(); +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT + Scale_sig( st->mem_syn2_fx, M, -1 ); // -1 should be calculated from q_win. +#endif } ELSE /*ACELP core for ACELP-PLC */ { diff --git a/lib_dec/stat_dec.h b/lib_dec/stat_dec.h index fcd5142a4c11b824f0a9638e8ca0d1209ae58b37..84f8d83bd7351e52d1e292390021d7d990d5142c 100644 --- a/lib_dec/stat_dec.h +++ b/lib_dec/stat_dec.h @@ -702,6 +702,10 @@ typedef struct hq_dec_structure Word16 exp_old_out; Word16 old_out_LB_fx[L_FRAME32k]; /* HQ core - previous synthesis for OLA for Low Band */ Word32 old_out_LB_fx32[L_FRAME32k]; +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT_DYNAMIC_QOLD + Word16 q_old_out_LB_fx; + Word16 q_old_out_fx; +#endif Word16 q_old_outLB_fx; Word16 Q_old_wtda_LB; Word16 Q_old_wtda;