From 202f6713040a5daaf900ee708e157b489ece0f13 Mon Sep 17 00:00:00 2001 From: Sandesh Venkatesh Date: Mon, 12 Feb 2024 14:37:31 +0530 Subject: [PATCH] generate_stereo_masking_noise() and sub-funcs converted to fixed point. Below functions are converted to fixed point: generate_stereo_masking_noise(), generate_masking_noise(), SynthesisSTFT_flt() along with a few helper functions. --- lib_com/cnst.h | 1 + lib_com/fd_cng_com.c | 132 ++++++++- lib_com/ivas_cnst.h | 2 + lib_com/preemph.c | 29 ++ lib_com/prot.h | 57 +++- lib_com/residu.c | 30 ++ lib_com/rom_com.c | 62 ++++- lib_com/rom_com.h | 3 + lib_com/stat_com.h | 18 ++ lib_dec/acelp_core_dec.c | 16 ++ lib_dec/amr_wb_dec.c | 4 + lib_dec/fd_cng_dec.c | 414 +++++++++++++++++++++++++++- lib_dec/ivas_stat_dec.h | 2 + lib_dec/ivas_stereo_cng_dec.c | 4 + lib_dec/ivas_stereo_mdct_core_dec.c | 5 +- lib_dec/ivas_tcx_core_dec.c | 9 +- 16 files changed, 780 insertions(+), 8 deletions(-) diff --git a/lib_com/cnst.h b/lib_com/cnst.h index e2073f223..f3b763476 100644 --- a/lib_com/cnst.h +++ b/lib_com/cnst.h @@ -1666,6 +1666,7 @@ enum #define FD_CNG_JOINT_stages_25bits 4 #define OUTMAX_INV 0.000030517578125f /* 1/2^15 */ +#define OUTMAX_INV_FX 65536 /* 1/2^15 (Q31) */ #define OUTMAX_SQ 1073741824.f /* 2^30 */ #define OUTMAX_SQ_INV 0.00000000093132257461547852f /* 1/2^30 */ diff --git a/lib_com/fd_cng_com.c b/lib_com/fd_cng_com.c index 61738d940..a48c9045c 100644 --- a/lib_com/fd_cng_com.c +++ b/lib_com/fd_cng_com.c @@ -39,10 +39,14 @@ #include "options.h" #include #include "prot.h" +#include "prot_fx2.h" #include "rom_com.h" #include "wmc_auto.h" - +#ifdef IVAS_FLOAT_FIXED +#define FFT_SCALING_512 1073741824 //Q22 +#define FFT_SCALING_640 1342177280 //Q22 +#endif /*------------------------------------------------------------------- * Local function prototypes *-------------------------------------------------------------------*/ @@ -103,6 +107,10 @@ void initFdCngCom_flt( set_f( hFdCngCom->sidNoiseEst_flt, 0.0f, NPART ); set_f( hFdCngCom->A_cng_flt, 0.0f, M + 1 ); hFdCngCom->A_cng_flt[0] = 1.f; +#ifdef IVAS_FLOAT_FIXED + set_s( hFdCngCom->A_cng, 0, M + 1 ); + hFdCngCom->A_cng[0] = MAX_16; +#endif /* Set some counters and flags */ hFdCngCom->inactive_frame_counter = 0; /* Either SID or zero frames */ @@ -110,6 +118,9 @@ void initFdCngCom_flt( hFdCngCom->frame_type_previous = ACTIVE_FRAME; hFdCngCom->flag_noisy_speech = 0; hFdCngCom->likelihood_noisy_speech_flt = 0.f; +#ifdef IVAS_FLOAT_FIXED + hFdCngCom->likelihood_noisy_speech_32fx = 0; +#endif hFdCngCom->numCoreBands = 0; hFdCngCom->stopBand = 0; hFdCngCom->startBand = 0; @@ -937,6 +948,115 @@ void SynthesisSTFT_flt( return; } +#ifdef IVAS_FLOAT_FIXED +/*------------------------------------------------------------------- + * SynthesisSTFT_fx() + * + * STFT synthesis filterbank + *-------------------------------------------------------------------*/ + +void SynthesisSTFT_fx( + Word32 *fftBuffer, /* i : FFT bins */ + Word16 Q_in, + Word32 *timeDomainOutput, + Word32 *olapBuffer, + const Word16 *olapWin, + const int16_t tcx_transition, + HANDLE_FD_CNG_COM hFdCngCom, /* i/o: FD_CNG structure containing all buffers and variables */ + const int16_t element_mode, /* i : element mode */ + const int16_t nchan_out /* i : number of output channels */ +) +{ + int16_t i; + Word32 buf_fx[M + 1 + 320], tmp_fx; + + /* Perform IFFT */ + RFFTN_fx( fftBuffer, hFdCngCom->fftSineTab_fx, hFdCngCom->fftlen, 1 ); + + /* Handle overlap in P/S domain for stereo */ + IF( ( element_mode == IVAS_CPE_TD || element_mode == IVAS_CPE_DFT ) && nchan_out == 2 ) + { + mvl2l( olapBuffer + 3 * hFdCngCom->frameSize / 4 - ( M + 1 ), buf_fx, hFdCngCom->frameSize + M + 1 ); + set_l( olapBuffer, 0, hFdCngCom->fftlen ); + } + ELSE + { + mvl2l( olapBuffer + hFdCngCom->frameSize, olapBuffer, hFdCngCom->frameSize ); + set_l( olapBuffer + hFdCngCom->frameSize, 0, hFdCngCom->frameSize ); /*olapBuffer, fftBuffer, olapWin*/ + } + + IF( tcx_transition ) + { + FOR( i = 0; i < 5 * hFdCngCom->frameSize / 4; i++ ) + { + olapBuffer[i] = fftBuffer[i]; + } + } + ELSE + { + FOR( i = hFdCngCom->frameSize / 4; i < 3 * hFdCngCom->frameSize / 4; i++ ) + { + olapBuffer[i] = L_add( olapBuffer[i], Mpy_32_16_1( fftBuffer[i], olapWin[i - hFdCngCom->frameSize / 4] ) ); + } + FOR( ; i < 5 * hFdCngCom->frameSize / 4; i++ ) + { + olapBuffer[i] = fftBuffer[i]; + } + } + FOR( ; i < 7 * hFdCngCom->frameSize / 4; i++ ) + { + olapBuffer[i] = Mpy_32_16_1( fftBuffer[i], olapWin[i - 3 * hFdCngCom->frameSize / 4] ); + } + + FOR( ; i < hFdCngCom->fftlen; i++ ) + { + olapBuffer[i] = 0; + } + + Word32 fftScale = 0; + SWITCH( hFdCngCom->fftlen ) + { + case 640: + fftScale = FFT_SCALING_640; + break; + case 512: + fftScale = FFT_SCALING_512; + break; + default: + assert( !"Not supported FFT length!" ); + } + /* Get time-domain signal */ + // v_multc(olapBuffer + hFdCngCom->frameSize / 4, (float)(hFdCngCom->fftlen / 2), timeDomainOutput, hFdCngCom->frameSize); + v_multc_fixed( olapBuffer + hFdCngCom->frameSize / 4, fftScale, timeDomainOutput, hFdCngCom->frameSize ); // Q_in - 9 + /* Get excitation */ + IF( ( element_mode == IVAS_CPE_TD || element_mode == IVAS_CPE_DFT ) && nchan_out == 2 ) + { + FOR( i = 0; i < hFdCngCom->frameSize / 2; i++ ) + { + buf_fx[i + ( M + 1 )] = L_add( buf_fx[i + ( M + 1 )], olapBuffer[i + hFdCngCom->frameSize / 4] ); + } + // v_multc(buf, (float)(hFdCngCom->fftlen / 2), buf, M + 1 + hFdCngCom->frameSize); + v_multc_fixed( buf_fx, fftScale, buf_fx, M + 1 + hFdCngCom->frameSize ); + } + ELSE + { + // v_multc(olapBuffer + hFdCngCom->frameSize / 4 - (M + 1), (float)(hFdCngCom->fftlen / 2), buf, M + 1 + hFdCngCom->frameSize); + v_multc_fixed( olapBuffer + ( hFdCngCom->frameSize / 4 ) - ( M + 1 ), fftScale, buf_fx, M + 1 + hFdCngCom->frameSize ); + } + + tmp_fx = buf_fx[0]; + // preemph(buf + 1, PREEMPH_FAC_FLT, M + hFdCngCom->frameSize, &tmp); + preemph_ivas_fx( buf_fx + 1, PREEMPH_FAC, M + hFdCngCom->frameSize, &tmp_fx ); + // residu(hFdCngCom->A_cng_flt, M, buf + 1 + M, hFdCngCom->exc_cng_flt, hFdCngCom->frameSize); + residu_ivas_fx( hFdCngCom->A_cng, Q13, M, buf_fx + 1 + M, hFdCngCom->exc_cng_32fx, hFdCngCom->frameSize ); + for ( i = 0; i < hFdCngCom->frameSize; i++ ) + { + hFdCngCom->exc_cng_flt[i] = fix_to_float( hFdCngCom->exc_cng_32fx[i], Q_in - 9 ); + } + + return; +} +#endif /*------------------------------------------------------------------- * SynthesisSTFT_dirac_flt() @@ -1112,6 +1232,9 @@ void lpc_from_spectrum_flt( int16_t fftlen = hFdCngCom->fftlen; const float *fftSineTab = hFdCngCom->fftSineTab_flt; float *A = hFdCngCom->A_cng_flt; +#ifdef IVAS_FLOAT_FIXED + Word16 *A_fx = hFdCngCom->A_cng; +#endif /* Power Spectrum */ ptr = fftBuffer; @@ -1165,7 +1288,12 @@ void lpc_from_spectrum_flt( /* LPC */ lev_dur( A, r, M, NULL ); - +#ifdef IVAS_FLOAT_FIXED + for ( i = 0; i < M + 1; i++ ) + { + A_fx[i] = float_to_fix16( A[i], Q13 ); + } +#endif return; } diff --git a/lib_com/ivas_cnst.h b/lib_com/ivas_cnst.h index b43355c63..33e301c82 100644 --- a/lib_com/ivas_cnst.h +++ b/lib_com/ivas_cnst.h @@ -50,6 +50,8 @@ #define _180_OVER_PI_Q25 1922527233 #define PI_OVER_4_Q29 421657440 #define PI_OVER_Q29 1686629760 +#define Q31_0_99 2126008811 +#define Q31_0_01 21474836 #endif #define SQRT2 1.414213562373095f diff --git a/lib_com/preemph.c b/lib_com/preemph.c index 024ad379c..2ad07ad70 100644 --- a/lib_com/preemph.c +++ b/lib_com/preemph.c @@ -66,3 +66,32 @@ void preemph( return; } + + +/*-------------------------------------------------------------* + * preemph_ivas_fx() + * + * Preemphasis: filtering through 1 - mu z^-1 + *-------------------------------------------------------------*/ + +void preemph_ivas_fx( + Word32 *signal, /* i/o: signal */ + const Word16 mu, /* i : preemphasis factor */ + const Word16 L, /* i : vector size */ + Word32 *mem /* i/o: memory (x[-1]) */ +) +{ + Word16 i; + Word32 temp; + + temp = signal[L - 1]; + FOR( i = L - 1; i > 0; i-- ) + { + signal[i] = L_sub( signal[i], Mpy_32_16_1( signal[i - 1], mu ) ); + } + + signal[0] = L_sub( signal[0], Mpy_32_16_1( *mem, mu ) ); + *mem = temp; + + return; +} \ No newline at end of file diff --git a/lib_com/prot.h b/lib_com/prot.h index 12f67086e..1b9441740 100644 --- a/lib_com/prot.h +++ b/lib_com/prot.h @@ -813,7 +813,12 @@ void preemph( const int16_t L, /* i : vector size */ float *mem /* i/o: memory (x[-1]) */ ); - +void preemph_ivas_fx( + Word32 *signal, /* i/o: signal */ + const Word16 mu, /* i : preemphasis factor */ + const Word16 L, /* i : vector size */ + Word32 *mem /* i/o: memory (x[-1]) */ +); void cb_shape( const int16_t preemphFlag, /* i : flag for pre-emphasis */ const int16_t pitchFlag, /* i : flag for pitch sharpening */ @@ -1102,6 +1107,15 @@ void residu( const int16_t l /* i : size of filtering */ ); +void residu_ivas_fx( + const Word16 *a, /* i : LP filter coefficients */ + const Word16 a_exp, + const Word16 m, /* i : order of LP filter */ + const Word32 *x, /* i : input signal (usually speech) */ + Word32 *y, /* o : output signal (usually residual) */ + const Word16 l /* i : size of filtering */ +); + void calc_residu( const float *speech, /* i : weighted speech signal */ float *res, /* o : residual signal */ @@ -8635,6 +8649,21 @@ void generate_masking_noise_flt( const int16_t nchan_out /* i : number of output channels */ ); +#ifdef IVAS_FLOAT_FIXED +void generate_masking_noise_ivas_fx( + Word32 *timeDomainBuffer, /* i/o: time-domain signal */ + Word16 *exp_out, /* o : time-domain signal exp */ + HANDLE_FD_CNG_COM hFdCngCom, /* i/o: FD_CNG structure containing all buffers and variables */ + const int16_t length, /* i : frame size */ + const int16_t core, /* i : core */ + const int16_t return_noise, /* i : noise is returned instead of added */ + const int16_t secondary, /* i : flag to indicate secondary noise generation */ + const int16_t element_mode, /* i : element mode */ + STEREO_CNG_DEC_HANDLE hStereoCng, /* i : stereo CNG handle */ + const int16_t nchan_out /* i : number of output channels */ +); +#endif + void generate_masking_noise_update_seed_flt( HANDLE_FD_CNG_COM hFdCngCom /* i/o: FD_CNG structure containing all buffers and variables */ ); @@ -8674,6 +8703,18 @@ void generate_stereo_masking_noise( const int16_t nchan_out /* i : number of output channels */ ); +#ifdef IVAS_FLOAT_FIXED +void generate_stereo_masking_noise_fx( + float *syn, /* i/o: time-domain signal */ + Decoder_State *st, /* i/o: decoder state structure */ + STEREO_TD_DEC_DATA_HANDLE hStereoTD, /* i : TD stereo structure */ + const int16_t flag_sec_CNA, /* i : CNA flag for secondary channel */ + const int16_t fadeOut, /* i : only fade out of previous state */ + STEREO_CNG_DEC_HANDLE hStereoCng, /* i : stereo CNG handle */ + const int16_t nchan_out /* i : number of output channels */ +); +#endif + void apply_scale_flt( float *scale, /* i : scale factor */ const int16_t bwidth, /* i : audio bandwidth */ @@ -8728,6 +8769,20 @@ void SynthesisSTFT_flt( const int16_t nchan_out /* i : number of output channels */ ); +#ifdef IVAS_FLOAT_FIXED +void SynthesisSTFT_fx( + Word32 *fftBuffer, /* i : FFT bins */ + Word16 Q_in, + Word32 *timeDomainOutput, + Word32 *olapBuffer, + const Word16 *olapWin, + const int16_t tcx_transition, + HANDLE_FD_CNG_COM hFdCngCom, /* i/o: FD_CNG structure containing all buffers and variables */ + const int16_t element_mode, /* i : element mode */ + const int16_t nchan_out /* i : number of output channels */ +); +#endif + float rand_gauss_flt( float *x, int16_t *seed ); diff --git a/lib_com/residu.c b/lib_com/residu.c index a46001c26..e86d448f6 100644 --- a/lib_com/residu.c +++ b/lib_com/residu.c @@ -71,6 +71,36 @@ void residu( return; } +/*--------------------------------------------------------------------* + * residu_ivas_fx() + * + * Compute the LP residual by filtering the input speech through A(z) + *--------------------------------------------------------------------*/ + +void residu_ivas_fx( + const Word16 *a, /* i : LP filter coefficients */ + const Word16 a_exp, + const Word16 m, /* i : order of LP filter */ + const Word32 *x, /* i : input signal (usually speech) */ + Word32 *y, /* o : output signal (usually residual) */ + const Word16 l /* i : size of filtering */ +) +{ + Word32 s; + int16_t i, j; + + for ( i = 0; i < l; i++ ) + { + s = x[i]; + for ( j = 1; j <= m; j++ ) + { + s = L_add( s, L_shl( Mpy_32_16_1( x[i - j], a[j] ), Q15 - a_exp ) ); + } + y[i] = s; + } + + return; +} /*--------------------------------------------------------------------* * calc_residu() * diff --git a/lib_com/rom_com.c b/lib_com/rom_com.c index 9dc82a9c3..d920b76e4 100644 --- a/lib_com/rom_com.c +++ b/lib_com/rom_com.c @@ -10739,6 +10739,12 @@ const Word16 scaleTable_cn_only_amrwbio[3][2] = { ACELP_8k85, 16306/*0.9952622652 Q14*/ }, { ACELP_12k65, 9583/*0.5848932266 Q14*/ }, }; +const Word32 scaleTable_cn_only_amrwbio_fx_by_10f[SIZE_SCALE_TABLE_CN_AMRWB][2] = +{ + { ACELP_6k60, 858993459 }, + { ACELP_8k85, 644245094 }, + { ACELP_12k65, 429496729 } +}; const int16_t sidparts_encoder_noise_est[SIZE_SIDPARTS_ENC_NOISE_EST] = { 4, 8, 12, 16, 20, 24, 30, 36, 42, 50, 58, 68, 80, 92, 108, 126, 148, 176, 212, 255, 259, 264, 269, 279 }; @@ -13352,6 +13358,26 @@ const float olapWinSyn256[256] = 0.0184067299f, 0.0061358846f }; +const Word16 olapWinSyn256_fx[256] = +{ + 201, 603, 1005, 1407, 1808, 2210, 2611, 3011, 3411, 3811, 4210, 4609, 5006, 5403, 5800, 6195, + 6589, 6983, 7375, 7766, 8156, 8545, 8933, 9319, 9704, 10087, 10469, 10849, 11228, 11605, 11980, 12353, + 12725, 13094, 13462, 13828, 14191, 14552, 14912, 15269, 15623, 15976, 16325, 16673, 17018, 17360, 17700, 18037, + 18371, 18703, 19032, 19358, 19681, 20001, 20318, 20631, 20942, 21250, 21555, 21856, 22154, 22448, 22740, 23027, + 23312, 23593, 23870, 24144, 24414, 24680, 24943, 25201, 25457, 25708, 25955, 26199, 26438, 26674, 26905, 27133, + 27356, 27576, 27791, 28002, 28208, 28411, 28609, 28803, 28993, 29178, 29359, 29535, 29707, 29874, 30037, 30196, + 30350, 30499, 30644, 30784, 30919, 31050, 31176, 31298, 31414, 31526, 31634, 31736, 31834, 31927, 32015, 32098, + 32176, 32250, 32319, 32383, 32442, 32496, 32545, 32589, 32629, 32663, 32693, 32718, 32737, 32752, 32762, 32767, + 32767, 32762, 32752, 32737, 32718, 32693, 32663, 32629, 32589, 32545, 32496, 32442, 32383, 32319, 32250, 32176, + 32098, 32015, 31927, 31834, 31736, 31634, 31526, 31414, 31298, 31176, 31050, 30919, 30784, 30644, 30499, 30350, + 30196, 30037, 29874, 29707, 29535, 29359, 29178, 28993, 28803, 28609, 28411, 28208, 28002, 27791, 27576, 27356, + 27133, 26905, 26674, 26438, 26199, 25955, 25708, 25457, 25201, 24943, 24680, 24414, 24144, 23870, 23593, 23312, + 23027, 22740, 22448, 22154, 21856, 21555, 21250, 20942, 20631, 20318, 20001, 19681, 19358, 19032, 18703, 18371, + 18037, 17700, 17360, 17018, 16673, 16325, 15976, 15623, 15269, 14912, 14552, 14191, 13828, 13462, 13094, 12725, + 12353, 11980, 11605, 11228, 10849, 10469, 10087, 9704, 9319, 8933, 8545, 8156, 7766, 7375, 6983, 6589, + 6195, 5800, 5403, 5006, 4609, 4210, 3811, 3411, 3011, 2611, 2210, 1808, 1407, 1005, 603, 201 +}; + const float olapWinSyn320[320] = { 0.0049087191000580788000000f, 0.0147256832569837570000000f, 0.0245412290096282960000000f, 0.0343544110655784610000000f, 0.0441642776131629940000000f, 0.0539698898792266850000000f, 0.0637703016400337220000000f, 0.0735645666718482970000000f, 0.0833517387509346010000000f, 0.0931308791041374210000000f, @@ -13388,7 +13414,41 @@ const float olapWinSyn320[320] = 0.0931307896971702580000000f, 0.0833516493439674380000000f, 0.0735644772648811340000000f, 0.0637702122330665590000000f, 0.0539698041975498200000000f, 0.0441641919314861300000000f, 0.0343543216586112980000000f, 0.0245411414653062820000000f, 0.0147255966439843180000000f, 0.0049086315557360649000000f }; - +const Word16 olapWinSyn320_fx[320] = +{ + 160, 482, 804, 1125, 1447, 1768, 2089, 2410, 2731, 3051, + 3371, 3691, 4011, 4330, 4648, 4967, 5284, 5602, 5918, 6234, + 6550, 6865, 7179, 7493, 7805, 8117, 8429, 8739, 9049, 9358, + 9665, 9972, 10278, 10583, 10887, 11190, 11492, 11793, 12092, 12391, + 12688, 12984, 13278, 13572, 13864, 14155, 14444, 14732, 15019, 15304, + 15588, 15870, 16151, 16430, 16707, 16983, 17258, 17530, 17801, 18070, + 18338, 18604, 18868, 19130, 19390, 19648, 19905, 20159, 20412, 20663, + 20911, 21158, 21403, 21645, 21886, 22124, 22360, 22594, 22826, 23056, + 23283, 23509, 23732, 23952, 24171, 24387, 24600, 24812, 25021, 25227, + 25431, 25633, 25832, 26029, 26223, 26415, 26604, 26790, 26974, 27155, + 27334, 27510, 27684, 27854, 28023, 28188, 28351, 28511, 28668, 28822, + 28974, 29123, 29269, 29412, 29552, 29690, 29825, 29956, 30085, 30211, + 30334, 30455, 30572, 30686, 30797, 30906, 31011, 31114, 31213, 31309, + 31403, 31493, 31581, 31665, 31746, 31824, 31899, 31971, 32040, 32106, + 32169, 32229, 32285, 32339, 32389, 32436, 32480, 32521, 32559, 32594, + 32625, 32653, 32679, 32701, 32720, 32736, 32748, 32758, 32764, 32767, + 32767, 32764, 32758, 32748, 32736, 32720, 32701, 32679, 32653, 32625, + 32594, 32559, 32521, 32480, 32436, 32389, 32339, 32285, 32229, 32169, + 32106, 32040, 31971, 31899, 31824, 31746, 31665, 31581, 31493, 31403, + 31309, 31213, 31114, 31011, 30906, 30797, 30686, 30572, 30455, 30334, + 30211, 30085, 29956, 29825, 29690, 29552, 29412, 29269, 29123, 28974, + 28822, 28668, 28511, 28351, 28188, 28023, 27854, 27684, 27510, 27334, + 27155, 26974, 26790, 26604, 26415, 26223, 26029, 25832, 25633, 25431, + 25227, 25021, 24812, 24600, 24387, 24171, 23952, 23732, 23509, 23283, + 23056, 22826, 22594, 22360, 22124, 21886, 21645, 21403, 21158, 20911, + 20663, 20412, 20159, 19905, 19648, 19390, 19130, 18868, 18604, 18338, + 18070, 17801, 17530, 17258, 16983, 16707, 16430, 16151, 15870, 15588, + 15304, 15019, 14732, 14444, 14155, 13864, 13572, 13278, 12984, 12688, + 12391, 12092, 11793, 11492, 11190, 10887, 10583, 10278, 9972, 9665, + 9358, 9049, 8739, 8429, 8117, 7805, 7493, 7179, 6865, 6550, + 6234, 5918, 5602, 5284, 4967, 4648, 4330, 4011, 3691, 3371, + 3051, 2731, 2410, 2089, 1768, 1447, 1125, 804, 482, 160 +}; /*-------------------------------------------------------------------* * AMR-WB ISF codebook - common 1st stage for 46bit and 36bit codebooks, diff --git a/lib_com/rom_com.h b/lib_com/rom_com.h index 7d680e9aa..36bd729de 100644 --- a/lib_com/rom_com.h +++ b/lib_com/rom_com.h @@ -1670,6 +1670,7 @@ extern const SCALE_SETUP scaleTable_cn_only[SIZE_SCALE_TABLE_CN]; extern const SCALE_SETUP scaleTable_cn_dirac[15]; extern const float scaleTable_cn_only_amrwbio_flt[SIZE_SCALE_TABLE_CN_AMRWB][2]; extern const Word16 scaleTable_cn_only_amrwbio[SIZE_SCALE_TABLE_CN_AMRWB][2]; +extern const Word32 scaleTable_cn_only_amrwbio_fx_by_10f[SIZE_SCALE_TABLE_CN_AMRWB][2]; extern const int16_t sidparts_encoder_noise_est[SIZE_SIDPARTS_ENC_NOISE_EST]; @@ -1694,7 +1695,9 @@ extern const float olapWinAna512[512]; extern const float olapWinAna640[640]; extern const float olapWinSyn256[256]; +extern const Word16 olapWinSyn256_fx[256]; extern const float olapWinSyn320[320]; +extern const Word16 olapWinSyn320_fx[320]; #ifdef ERI_FDCNGVQ_LOW_ROM_TESTING extern const float *const cdk_37bits_ivas_orig[]; diff --git a/lib_com/stat_com.h b/lib_com/stat_com.h index 521ac037c..3f889cd8c 100644 --- a/lib_com/stat_com.h +++ b/lib_com/stat_com.h @@ -399,10 +399,22 @@ typedef struct float fftBuffer_flt[FFTLEN]; float olapBufferAna_flt[FFTLEN]; float olapBufferSynth_flt[FFTLEN]; +#ifdef IVAS_FLOAT_FIXED + Word32 olapBufferSynth_fx[FFTLEN]; +#endif float olapBufferSynth2_flt[FFTLEN]; +#ifdef IVAS_FLOAT_FIXED + Word32 olapBufferSynth2_fx[FFTLEN]; +#endif const float *olapWinAna_flt; const float *olapWinSyn_flt; +#ifdef IVAS_FLOAT_FIXED + const Word16 *olapWinSyn_fx; +#endif const float *fftSineTab_flt; +#ifdef IVAS_FLOAT_FIXED + const Word16 *fftSineTab_fx; +#endif Word32 fftBuffer[FFTLEN]; Word16 *olapBufferAna; /* points to FD_CNG_DEC->olapBufferAna[320] in case of decoder */ Word16 olapBufferSynth[FFTLEN]; @@ -489,12 +501,18 @@ typedef struct float exc_cng_flt[L_FRAME16k]; Word16 A_cng[M + 1]; Word16 exc_cng[L_FRAME16k]; +#ifdef IVAS_FLOAT_FIXED + Word32 exc_cng_32fx[L_FRAME16k]; +#endif int32_t CngBitrate; int16_t CngBandwidth; int16_t flag_noisy_speech; float likelihood_noisy_speech_flt; +#ifdef IVAS_FLOAT_FIXED + Word32 likelihood_noisy_speech_32fx; +#endif Word16 likelihood_noisy_speech; float coherence_flt; /* inter-channel coherence of noise */ diff --git a/lib_dec/acelp_core_dec.c b/lib_dec/acelp_core_dec.c index 415d81bb1..af8ec5dbc 100644 --- a/lib_dec/acelp_core_dec.c +++ b/lib_dec/acelp_core_dec.c @@ -1116,6 +1116,10 @@ ivas_error acelp_core_dec( noisy_speech_detection_flt( st->hFdCngDec, st->VAD, syn ); st->hFdCngDec->hFdCngCom->likelihood_noisy_speech_flt = 0.99f * st->hFdCngDec->hFdCngCom->likelihood_noisy_speech_flt + 0.01f * (float) st->hFdCngDec->hFdCngCom->flag_noisy_speech; +#ifdef IVAS_FLOAT_FIXED + st->hFdCngDec->hFdCngCom->likelihood_noisy_speech_32fx = L_add( Mpy_32_32( Q31_0_99, st->hFdCngDec->hFdCngCom->likelihood_noisy_speech_32fx ), + st->hFdCngDec->hFdCngCom->flag_noisy_speech * Q31_0_01 ); +#endif } if ( st->idchan == 0 ) @@ -1138,7 +1142,11 @@ ivas_error acelp_core_dec( { if ( hStereoCng->flag_cna_fade ) { +#ifndef IVAS_FLOAT_FIXED generate_stereo_masking_noise( syn, st, hStereoTD, flag_sec_CNA, 1, hStereoCng, nchan_out ); +#else + generate_stereo_masking_noise_fx( syn, st, hStereoTD, flag_sec_CNA, 1, hStereoCng, nchan_out ); +#endif hStereoCng->flag_cna_fade = 0; } else @@ -1149,7 +1157,11 @@ ivas_error acelp_core_dec( set_f( hStereoCng->olapBufferSynth22, 0.0f, st->hFdCngDec->hFdCngCom->frameSize / 2 ); } +#ifndef IVAS_FLOAT_FIXED generate_stereo_masking_noise( syn, st, hStereoTD, flag_sec_CNA, 0, hStereoCng, nchan_out ); +#else + generate_stereo_masking_noise_fx( syn, st, hStereoTD, flag_sec_CNA, 0, hStereoCng, nchan_out ); +#endif } } else if ( st->element_mode != IVAS_CPE_DFT ) @@ -1168,7 +1180,11 @@ ivas_error acelp_core_dec( { if ( st->element_mode == IVAS_CPE_TD && nchan_out == 2 ) { +#ifndef IVAS_FLOAT_FIXED generate_stereo_masking_noise( syn, st, hStereoTD, flag_sec_CNA, 1, hStereoCng, nchan_out ); +#else + generate_stereo_masking_noise_fx( syn, st, hStereoTD, flag_sec_CNA, 1, hStereoCng, nchan_out ); +#endif hStereoCng->flag_cna_fade = 1; } else diff --git a/lib_dec/amr_wb_dec.c b/lib_dec/amr_wb_dec.c index 41c265c84..cc59d4802 100644 --- a/lib_dec/amr_wb_dec.c +++ b/lib_dec/amr_wb_dec.c @@ -627,6 +627,10 @@ ivas_error amr_wb_dec_flt( noisy_speech_detection_flt( st->hFdCngDec, st->VAD, syn ); st->hFdCngDec->hFdCngCom->likelihood_noisy_speech_flt = 0.99f * st->hFdCngDec->hFdCngCom->likelihood_noisy_speech_flt + 0.01f * (float) st->hFdCngDec->hFdCngCom->flag_noisy_speech; +#ifdef IVAS_FLOAT_FIXED + st->hFdCngDec->hFdCngCom->likelihood_noisy_speech_32fx = L_add( Mpy_32_32( Q31_0_99, st->hFdCngDec->hFdCngCom->likelihood_noisy_speech_32fx ), + st->hFdCngDec->hFdCngCom->flag_noisy_speech * Q31_0_01 ); +#endif st->lp_noise_float = st->hFdCngDec->lp_noise_float; if ( st->flag_cna && ( st->psf_lp_noise >= 15.f ) ) diff --git a/lib_dec/fd_cng_dec.c b/lib_dec/fd_cng_dec.c index 69b2204a2..bb1283be1 100644 --- a/lib_dec/fd_cng_dec.c +++ b/lib_dec/fd_cng_dec.c @@ -53,11 +53,12 @@ *-------------------------------------------------------------------*/ #define DELTA_MASKING_NOISE 1e-20f +#define DELTA_MASKING_NOISE_Q31 0 #define CNA_ACT_DN_LARGE_PARTITION 50 /* index of the first larger partition */ #define ST_PERIODOG_FACT 0.9 /* short-term filter factor for periodogram */ #define CNA_ACT_DN_FACT 0.7 /* downward updating factor for CNA during active frames */ #define FIRST_CNA_NOISE_UPD_FRAMES 5 /* minimum number of CN initialization frames */ - +#define LOG_10_BASE_2 1783446566 /* Q29 */ /*------------------------------------------------------------------- * Local fucntions declarations @@ -328,11 +329,19 @@ void configureFdCngDec_flt( hsCom->fftSineTab_flt = NULL; hsCom->olapWinAna_flt = olapWinAna512; hsCom->olapWinSyn_flt = olapWinSyn256; +#ifdef IVAS_FLOAT_FIXED + hsCom->fftSineTab_fx = NULL; + hsCom->olapWinSyn_fx = olapWinSyn256_fx; +#endif break; case 640: hsCom->fftSineTab_flt = fftSineTab640; hsCom->olapWinAna_flt = olapWinAna640; hsCom->olapWinSyn_flt = olapWinSyn320; +#ifdef IVAS_FLOAT_FIXED + hsCom->fftSineTab_fx = fftSineTab640_fx; + hsCom->olapWinSyn_fx = olapWinSyn320_fx; +#endif break; default: assert( !"Unsupported FFT length for FD-based CNG" ); @@ -1202,8 +1211,30 @@ void generate_comfort_noise_dec_flt( } /* Perform STFT synthesis */ +#ifndef IVAS_FLOAT_FIXED SynthesisSTFT_flt( fftBuffer, timeDomainOutput, hFdCngCom->olapBufferSynth_flt, hFdCngCom->olapWinSyn_flt, tcx_transition, hFdCngCom, st->element_mode, nchan_out ); - +#else + Word32 fftBuffer_fx[FFTLEN], timeDomainBuffer_fx[L_FRAME16k]; + Word16 exp = Q15; + for ( i = 0; i < hFdCngCom->fftlen; i++ ) + { + fftBuffer_fx[i] = float_to_fix( fftBuffer[i], exp ); + } + for ( i = 0; i < hFdCngCom->fftlen; i++ ) + { + hFdCngCom->olapBufferSynth_fx[i] = float_to_fix( hFdCngCom->olapBufferSynth_flt[i], exp ); + } + SynthesisSTFT_fx( fftBuffer_fx, exp, timeDomainBuffer_fx, hFdCngCom->olapBufferSynth_fx, hFdCngCom->olapWinSyn_fx, tcx_transition, hFdCngCom, st->element_mode, nchan_out ); + for ( i = 0; i < hFdCngCom->fftlen; i++ ) + { + hFdCngCom->olapBufferSynth_flt[i] = fix_to_float( hFdCngCom->olapBufferSynth_fx[i], exp ); + } + exp = exp - 9; + for ( i = 0; i < hFdCngCom->frameSize; i++ ) + { + timeDomainOutput[i] = fix_to_float( timeDomainBuffer_fx[i], exp ); + } +#endif /* update CNG excitation energy for LP_CNG */ /* calculate the residual signal energy */ @@ -1445,6 +1476,7 @@ void generate_masking_noise_flt( const int16_t nchan_out /* i : number of output channels */ ) { +#ifndef IVAS_FLOAT_FIXED float *cngNoiseLevel_flt = hFdCngCom->cngNoiseLevel_flt; float *ptr_level = cngNoiseLevel_flt; float *fftBuffer = hFdCngCom->fftBuffer_flt; @@ -1560,8 +1592,211 @@ void generate_masking_noise_flt( } return; +#else + Word32 tdb_temp[L_FRAME16k]; + Word16 tdb_temp_exp; + generate_masking_noise_ivas_fx( tdb_temp, &tdb_temp_exp, hFdCngCom, length, core, 1, secondary, element_mode, hStereoCng, nchan_out ); + if ( return_noise ) + { + for ( Word32 i = 0; i < min( hFdCngCom->frameSize, length ); i++ ) + { + timeDomainBuffer[i] = fix_to_float(tdb_temp[i], tdb_temp_exp ); + } + } + else + { + for ( Word32 i = 0; i < min( hFdCngCom->frameSize, length ); i++ ) + { + timeDomainBuffer[i] += fix_to_float(tdb_temp[i], tdb_temp_exp); + } + } +#endif } +#ifdef IVAS_FLOAT_FIXED +/*------------------------------------------------------------------- + * generate_masking_noise_ivas_fx() + * + * Generate additional comfort noise (kind of noise filling) + *-------------------------------------------------------------------*/ + +void generate_masking_noise_ivas_fx( + Word32 *timeDomainBuffer, /* i/o: time-domain signal */ + Word16 *exp_out, /* o : time-domain signal exp */ + HANDLE_FD_CNG_COM hFdCngCom, /* i/o: FD_CNG structure containing all buffers and variables */ + const int16_t length, /* i : frame size */ + const int16_t core, /* i : core */ + const int16_t return_noise, /* i : noise is returned instead of added */ + const int16_t secondary, /* i : flag to indicate secondary noise generation */ + const int16_t element_mode, /* i : element mode */ + STEREO_CNG_DEC_HANDLE hStereoCng, /* i : stereo CNG handle */ + const int16_t nchan_out /* i : number of output channels */ +) +{ + float *cngNoiseLevel_flt = hFdCngCom->cngNoiseLevel_flt; + Word32 max_cngNoiseLevel = 0; + Word32 *cngNoiseLevel_fx = hFdCngCom->cngNoiseLevel; + Word16 noise_exp; + Word32 *ptr_level_fx = cngNoiseLevel_fx; + Word32 *fftBuffer_fx = hFdCngCom->fftBuffer; + Word16 i; + Word32 maskingNoise_fx[L_FRAME16k]; + Word32 *ptr_r_fx; + Word32 *ptr_i_fx; + Word16 startBand = hFdCngCom->startBand; + Word16 *seed = &( hFdCngCom->seed ); + Word32 scale_fx = 0x40000000; // 1.0 in Q30 + + FOR( i = 0; i < FFTCLDFBLEN; i++ ) + { + max_cngNoiseLevel = L_max( L_abs( (Word32) cngNoiseLevel_flt[i] ), max_cngNoiseLevel ); + } + noise_exp = norm_l( max_cngNoiseLevel ); + FOR( i = 0; i < FFTCLDFBLEN; i++ ) + { + cngNoiseLevel_fx[i] = float_to_fix( cngNoiseLevel_flt[i], noise_exp ); + } + + /* skip noise generating if level is very low, to avoid problems with possibly running into denormals */ + *exp_out = Q15; + IF( hFdCngCom->likelihood_noisy_speech_32fx > DELTA_MASKING_NOISE_Q31 ) + { + IF( core != AMR_WB_CORE ) + { + /* Compute additional CN level */ + FOR( i = 0; i < SIZE_SCALE_TABLE_CN; i++ ) + { + IF( ( hFdCngCom->CngBandwidth == scaleTable_cn_only[i].bwmode ) && + ( hFdCngCom->CngBitrate >= scaleTable_cn_only[i].bitrateFrom ) && + ( hFdCngCom->CngBitrate < scaleTable_cn_only[i].bitrateTo ) ) + { + break; + } + } + + Word16 exp; + Word32 scale_temp = BASOP_util_Pow2( Mpy_32_32( float_to_fix( -scaleTable_cn_only[i].scale_flt / 10.f, Q31 ), LOG_10_BASE_2 ), Q2, &exp ); + scale_temp = L_sub( scale_temp, L_shl( 1, Q31 - exp ) ); + scale_fx = L_shl( scale_temp, exp - Q1 ); // Q30 + } + ELSE + { + /* Compute additional CN level */ + FOR( i = 0; i < SIZE_SCALE_TABLE_CN_AMRWB; i++ ) + { + IF( hFdCngCom->CngBitrate >= scaleTable_cn_only_amrwbio_flt[i][0] ) + { + break; + } + } + + IF( i < SIZE_SCALE_TABLE_CN_AMRWB ) + { + Word16 exp; + Word32 scale_temp = BASOP_util_Pow2( Mpy_32_32( scaleTable_cn_only_amrwbio_fx_by_10f[i][1], LOG_10_BASE_2 ), Q2, &exp ); + scale_temp = L_sub( scale_temp, L_shl( 1, Q31 - exp ) ); + scale_fx = L_shl( Mpy_32_32( scale_fx, scale_temp ), exp ); // Q30 + } + ELSE + { + scale_fx = 0; + } + } + + /* Exclude clean speech */ + scale_fx = Mpy_32_32( scale_fx, hFdCngCom->likelihood_noisy_speech_32fx ); // Q30 + + /* Generate Gaussian random noise in real and imaginary parts of the FFT bins + Amplitudes are adjusted to the estimated noise level cngNoiseLevel_flt in each bin */ + IF( startBand == 0 ) + { + rand_gauss_fx( &fftBuffer_fx[0], seed, *exp_out); // Q15 + ptr_r_fx = fftBuffer_fx + 2; + Word16 exp1 = 32 - noise_exp; + Word32 mpy1 = Sqrt32( Mpy_32_32( scale_fx, *ptr_level_fx ), &exp1 ); // Q = noise_exp-1 + mpy1 = L_shl( mpy1, exp1 ); // Q31 + fftBuffer_fx[0] = Mpy_32_32( fftBuffer_fx[0], mpy1 ); /* DC component in FFT */ // Q = Q15 + ptr_level_fx++; + } + ELSE + { + fftBuffer_fx[0] = 0; + set_l( fftBuffer_fx + 2, 0, 2 * ( startBand - 1 ) ); + ptr_r_fx = fftBuffer_fx + 2 * startBand; + } + ptr_i_fx = ptr_r_fx + 1; + FOR( ; ptr_level_fx < cngNoiseLevel_fx + hFdCngCom->stopFFTbin - startBand; ptr_level_fx++ ) + { + /* Real part in FFT bins */ + rand_gauss_fx( ptr_r_fx, seed, *exp_out); // Q15 + Word16 exp2 = 32 - noise_exp; + Word32 mpy2 = Sqrt32( L_shr( Mpy_32_32( scale_fx, *ptr_level_fx ), 1 ), &exp2 ); // Q = noise_exp-1 + ( *ptr_r_fx ) = L_shl( Mpy_32_32( *ptr_r_fx, mpy2 ), exp2 ); // Q = Q15 + ptr_r_fx += 2; + + /* Imaginary part in FFT bins */ + rand_gauss_fx( ptr_i_fx, seed, *exp_out); // Q15 + ( *ptr_i_fx ) = L_shl( Mpy_32_32( *ptr_i_fx, mpy2 ), exp2 ); // Q = Q15 + ptr_i_fx += 2; + } + + /* Remaining FFT bins are set to zero */ + set_l( fftBuffer_fx + 2 * hFdCngCom->stopFFTbin, 0, hFdCngCom->fftlen - 2 * hFdCngCom->stopFFTbin ); + /* Nyquist frequency is discarded */ + fftBuffer_fx[1] = 0; + } + ELSE + { + /* very low level case - update random seeds and reset FFT buffer; don't fully skip SynthesisSTFT_flt(), because of the buffer updates done there... */ + generate_masking_noise_update_seed( hFdCngCom ); + + set_l( fftBuffer_fx, 0, hFdCngCom->fftlen ); + } + + /* Perform STFT synthesis */ + IF( secondary ) + { + // SynthesisSTFT_flt(fftBuffer, maskingNoise, hStereoCng->olapBufferSynth22, hFdCngCom->olapWinSyn_flt, 0, hFdCngCom, element_mode, nchan_out); + FOR( i = 0; i < hFdCngCom->fftlen; i++ ) + { + hStereoCng->olapBufferSynth22_32fx[i] = float_to_fix( hStereoCng->olapBufferSynth22[i], *exp_out ); + } + SynthesisSTFT_fx( fftBuffer_fx, *exp_out, maskingNoise_fx, hStereoCng->olapBufferSynth22_32fx, hFdCngCom->olapWinSyn_fx, 0, hFdCngCom, element_mode, nchan_out ); + FOR( i = 0; i < hFdCngCom->fftlen; i++ ) + { + hStereoCng->olapBufferSynth22[i] = fix_to_float( hStereoCng->olapBufferSynth22_32fx[i], *exp_out ); + } + } + ELSE + { + // SynthesisSTFT_flt(fftBuffer, maskingNoise, hFdCngCom->olapBufferSynth2_flt, hFdCngCom->olapWinSyn_flt, 0, hFdCngCom, element_mode, nchan_out); + FOR( i = 0; i < hFdCngCom->fftlen; i++ ) + { + hFdCngCom->olapBufferSynth2_fx[i] = float_to_fix( hFdCngCom->olapBufferSynth2_flt[i], *exp_out ); + } + SynthesisSTFT_fx( fftBuffer_fx, *exp_out, maskingNoise_fx, hFdCngCom->olapBufferSynth2_fx, hFdCngCom->olapWinSyn_fx, 0, hFdCngCom, element_mode, nchan_out ); + FOR( i = 0; i < hFdCngCom->fftlen; i++ ) + { + hFdCngCom->olapBufferSynth2_flt[i] = fix_to_float( hFdCngCom->olapBufferSynth2_fx[i], *exp_out ); + } + } + *exp_out = *exp_out - 9; + + /* Add some comfort noise on top of decoded signal */ + IF( return_noise ) + { + // mvr2r(maskingNoise, timeDomainBuffer, min(hFdCngCom->frameSize, length)); + mvl2l( maskingNoise_fx, timeDomainBuffer, min( hFdCngCom->frameSize, length ) ); + } + ELSE + { + // v_add(maskingNoise, timeDomainBuffer, timeDomainBuffer, min(hFdCngCom->frameSize, length)); + v_add_fixed( maskingNoise_fx, timeDomainBuffer, timeDomainBuffer, min( hFdCngCom->frameSize, length ), 0 ); + } + + return; +} +#endif /*------------------------------------------------------------------- * generate_masking_noise_update_seed_flt() @@ -1813,6 +2048,181 @@ void generate_stereo_masking_noise( return; } +#ifdef IVAS_FLOAT_FIXED +/*------------------------------------------------------------------- + * generate_stereo_masking_noise_fx() + * + * Generate additional comfort noise (kind of noise filling) + *-------------------------------------------------------------------*/ + +void generate_stereo_masking_noise_fx( + float *syn, /* i/o: time-domain signal */ + Decoder_State *st, /* i/o: decoder state structure */ + STEREO_TD_DEC_DATA_HANDLE hStereoTD, /* i : TD stereo structure */ + const int16_t flag_sec_CNA, /* i : CNA flag for secondary channel */ + const int16_t fadeOut, /* i : only fade out of previous state */ + STEREO_CNG_DEC_HANDLE hStereoCng, /* i : Stereo CNG handle */ + const int16_t nchan_out /* i : number of output channels */ +) +{ + HANDLE_FD_CNG_COM hFdCngCom; + Word32 gamma_fx, scale_fx /*, SP_ratio_fx needs to be integrated*/; + Word32 Np_fx[L_FRAME16k]; + Word32 Ns_fx[L_FRAME16k]; + Word32 N1_fx[L_FRAME16k]; + Word32 N2_fx[L_FRAME16k]; + Word16 N1_fx_exp, N2_fx_exp; + int16_t i; + + IF( st->idchan == 0 ) + { + hFdCngCom = st->hFdCngDec->hFdCngCom; + // mvr2r(hStereoCng->olapBufferSynth22, Ns, hFdCngCom->frameSize / 2); + // mvr2r(hFdCngCom->olapBufferSynth2_flt, Np, hFdCngCom->frameSize / 2); + for ( i = 0; i < hFdCngCom->frameSize / 2; i++ ) + { + Ns_fx[i] = float_to_fix( hStereoCng->olapBufferSynth22[i], Q6 ); + Np_fx[i] = float_to_fix( hFdCngCom->olapBufferSynth2_flt[i], Q6 ); + } + set_l( &Np_fx[hFdCngCom->frameSize / 2], 0, hFdCngCom->frameSize / 2 ); + set_l( &Ns_fx[hFdCngCom->frameSize / 2], 0, hFdCngCom->frameSize / 2 ); + + IF( !fadeOut ) + { + // generate_masking_noise_flt(N1, hFdCngCom, hFdCngCom->frameSize, 0, 1, 0, st->element_mode, hStereoCng, nchan_out); + generate_masking_noise_ivas_fx( N1_fx, &N1_fx_exp, hFdCngCom, hFdCngCom->frameSize, 0, 1, 0, st->element_mode, hStereoCng, nchan_out ); // N1_fx Q6 + /* Generate masking noise for secondary channel */ + IF( flag_sec_CNA ) + { + // generate_masking_noise_flt(N2, hFdCngCom, hFdCngCom->frameSize, 0, 1, 1, st->element_mode, hStereoCng, nchan_out); + generate_masking_noise_ivas_fx( N2_fx, &N2_fx_exp, hFdCngCom, hFdCngCom->frameSize, 0, 1, 1, st->element_mode, hStereoCng, nchan_out ); // N2_fx Q6 + // gamma = hStereoCng->c_PS_LT * hStereoCng->c_PS_LT; + gamma_fx = float_to_fix( hStereoCng->c_PS_LT * hStereoCng->c_PS_LT, Q30 ); + // scale = 1.0f; + scale_fx = ONE_IN_Q30; + // if (gamma < 0.9f) + IF( gamma_fx < 966367642 ) + { + // gamma = gamma / (1 - gamma); + // gamma = (float)sqrt(gamma + 1) - (float)sqrt(gamma); + // scale = 1.0f / (float)sqrt(1 + gamma * gamma); + Word16 exp_gamma = 0; + Word16 divisor1 = Inv16( (Word16) L_shr( L_sub( ONE_IN_Q30, gamma_fx ), Q15 ), &exp_gamma ); // Q15-exp_gamma + gamma_fx = L_shl( Mpy_32_16_1( gamma_fx, divisor1 ), exp_gamma ); // Q30 + Word16 exp_gamma1 = Q1, exp_gamma2 = Q1, exp_gamma3 = Q1; + gamma_fx = Sqrt32( L_add( gamma_fx, ONE_IN_Q30 ), &exp_gamma1 ); + // gamma_fx = L_shl( gamma_fx, exp_gamma1 ); + Word32 temp = Sqrt32( gamma_fx, &exp_gamma2 ); // Q31-exp_gamma1 + gamma_fx = L_sub( gamma_fx, L_shl( temp, exp_gamma2 - exp_gamma1 ) ); // Q31-exp_gamma1 + gamma_fx = L_shl( gamma_fx, exp_gamma1 - Q1 ); // Q30 + Word32 divisor2 = Sqrt32( L_add( ONE_IN_Q30, L_shl( Mpy_32_32( gamma_fx, gamma_fx ), Q1 ) ), &exp_gamma3 ); // Q31 - exp_gamma3 + scale_fx = L_shl( divide3232( ONE_IN_Q30, divisor2 ), Q15 + exp_gamma3 ); // Q30 + } + ELSE + { + gamma_fx = 0; + } + + FOR( i = 0; i < 2 * hFdCngCom->frameSize / 4; i++ ) + { + // Np[i] += scale * (N1[i] + gamma * N2[i]); + // Ns[i] += scale * sign(hStereoCng->c_PS_LT) * (N1[i] - gamma * N2[i]); + Np_fx[i] = L_add( Np_fx[i], + Mpy_32_32( scale_fx, L_shl( L_add( N1_fx[i], Mpy_32_32( gamma_fx, L_shl( N2_fx[i], Q1 ) ) ), Q1 ) ) ); // Q6 + Word32 add2 = Mpy_32_32( scale_fx, L_shl( L_sub( N1_fx[i], Mpy_32_32( gamma_fx, L_shl( N2_fx[i], Q1 ) ) ), Q1 ) ); // Q6 + if ( hStereoCng->c_PS_LT < 0.0f ) + { + add2 = L_negate( add2 ); + } + Ns_fx[i] = L_add( Ns_fx[i], add2 ); + } + FOR( ; i < hFdCngCom->frameSize; i++ ) + { + // Np[i] = scale * (N1[i] + gamma * N2[i]); + // Ns[i] = scale * sign(hStereoCng->c_PS_LT) * (N1[i] - gamma * N2[i]); + Np_fx[i] = Mpy_32_32( scale_fx, L_shl( L_add( N1_fx[i], Mpy_32_32( gamma_fx, L_shl( N2_fx[i], Q1 ) ) ), Q1 ) ); // Q6 + Ns_fx[i] = Mpy_32_32( scale_fx, L_shl( L_sub( N1_fx[i], Mpy_32_32( gamma_fx, L_shl( N2_fx[i], Q1 ) ) ), Q1 ) ); // Q6 + IF( hStereoCng->c_PS_LT < 0.0f ) + { + Ns_fx[i] = L_negate( Ns_fx[i] ); + } + } + /* Below code to be converted */ + float gamma = fix_to_float( gamma_fx, Q30 ); + float scale = fix_to_float( scale_fx, Q30 ); + scale *= (float) ( hFdCngCom->fftlen / 2 ); + // scale_fx = L_shr(scale_fx, Q1) * hFdCngCom->fftlen; + for ( i = 0; i < hFdCngCom->frameSize / 2; i++ ) + { + hFdCngCom->olapBufferSynth2_flt[i] = scale * ( hFdCngCom->olapBufferSynth2_flt[i + 5 * hFdCngCom->frameSize / 4] + gamma * hStereoCng->olapBufferSynth22[i + 5 * hFdCngCom->frameSize / 4] ); + hStereoCng->olapBufferSynth22[i] = sign( hStereoCng->c_PS_LT ) * scale * ( hFdCngCom->olapBufferSynth2_flt[i + 5 * hFdCngCom->frameSize / 4] - gamma * hStereoCng->olapBufferSynth22[i + 5 * hFdCngCom->frameSize / 4] ); + } + } + else + { + FOR( i = 0; i < hFdCngCom->frameSize / 2; i++ ) + { + // Np[i] += N1[i]; + Np_fx[i] = L_add( Np_fx[i], N1_fx[i] ); // Q6 + } + // mvr2r(&N1[hFdCngCom->frameSize / 2], &Np[hFdCngCom->frameSize / 2], hFdCngCom->frameSize / 2); + mvl2l( &N1_fx[hFdCngCom->frameSize / 2], &Np_fx[hFdCngCom->frameSize / 2], hFdCngCom->frameSize / 2 ); + float scale = (float) ( hFdCngCom->fftlen / 2 ); + for ( i = 0; i < hFdCngCom->frameSize; i++ ) + { + hFdCngCom->olapBufferSynth2_flt[i] = scale * hFdCngCom->olapBufferSynth2_flt[i + 5 * hFdCngCom->frameSize / 4]; + } + } + } + else + { + set_f( hFdCngCom->olapBufferSynth2_flt, 0.0f, hFdCngCom->frameSize / 2 ); + set_f( hStereoCng->olapBufferSynth22, 0.0f, hFdCngCom->frameSize / 2 ); + } + if ( flag_sec_CNA ) + { + // mvr2r(Ns, hStereoCng->maskingNoiseS, hFdCngCom->frameSize); + for ( i = 0; i < hFdCngCom->frameSize; i++ ) + { + hStereoCng->maskingNoiseS[i] = fix_to_float( Ns_fx[i], Q6 ); + } + hStereoCng->enableSecCNA = 1; + } + else + { + set_f( hStereoCng->olapBufferSynth22, 0.0f, hFdCngCom->frameSize ); + } + + /* add masking noise */ + // v_add(Np, syn, syn, hFdCngCom->frameSize); + for ( i = 0; i < hFdCngCom->frameSize; i++ ) + { + syn[i] = syn[i] + fix_to_float( Np_fx[i], Q6 ); + } + } + else if ( hStereoCng->enableSecCNA ) + { + float SP_ratio = hStereoTD->SP_ratio_LT; /* Use long-term SP ratio based on L/R synthesis */ + /* scale and add masking noise */ + for ( i = 0; i < *hStereoCng->frameSize / 4; i++ ) + { + float scale = ( ( hStereoTD->prevSP_ratio * ( *hStereoCng->frameSize / 4 - (float) i ) + SP_ratio * (float) i ) / ( *hStereoCng->frameSize / 4 ) ); + syn[i] += scale * hStereoCng->maskingNoiseS[i]; + } + for ( ; i < *hStereoCng->frameSize / 2; i++ ) + { + syn[i] += SP_ratio * hStereoCng->maskingNoiseS[i]; + } + for ( ; i < *hStereoCng->frameSize; i++ ) + { + syn[i] += SP_ratio * hStereoCng->maskingNoiseS[i]; + } + hStereoTD->prevSP_ratio = SP_ratio; + } + + return; +} +#endif /*------------------------------------------------------------------- * generate_masking_noise_hf_cldfb() diff --git a/lib_dec/ivas_stat_dec.h b/lib_dec/ivas_stat_dec.h index b39b9abeb..66d3e576f 100644 --- a/lib_dec/ivas_stat_dec.h +++ b/lib_dec/ivas_stat_dec.h @@ -284,6 +284,8 @@ typedef struct stereo_dec_cng int16_t nr_sid_frames; /* SID frame counter */ int16_t last_act_element_mode; /* Element mode of last active frame */ float olapBufferSynth22[FFTLEN]; /* overlap buffer for secondary channel CNA */ + Word16 olapBufferSynth22_fx[FFTLEN]; /* overlap buffer for secondary channel CNA */ + Word32 olapBufferSynth22_32fx[FFTLEN]; /* overlap buffer for secondary channel CNA */ int16_t flag_cna_fade; /* flag enabling CNA fade out */ float maskingNoiseS[L_FRAME16k]; /* masking noise (CNA) for secondary channel */ int16_t enableSecCNA; /* flag enabling secondary channel CNA */ diff --git a/lib_dec/ivas_stereo_cng_dec.c b/lib_dec/ivas_stereo_cng_dec.c index b74860a9d..ecabe9a0b 100644 --- a/lib_dec/ivas_stereo_cng_dec.c +++ b/lib_dec/ivas_stereo_cng_dec.c @@ -599,6 +599,10 @@ static void stereo_dft_generate_comfort_noise( st->lp_noise_float = st->hFdCngDec->lp_noise_float; st->hFdCngDec->hFdCngCom->flag_noisy_speech = ( st->hFdCngDec->lp_speech_float - st->hFdCngDec->lp_noise_float ) < 28.f; st->hFdCngDec->hFdCngCom->likelihood_noisy_speech_flt = 0.99f * st->hFdCngDec->hFdCngCom->likelihood_noisy_speech_flt + 0.01f * st->hFdCngDec->hFdCngCom->flag_noisy_speech; +#ifdef IVAS_FLOAT_FIXED + st->hFdCngDec->hFdCngCom->likelihood_noisy_speech_32fx = L_add( Mpy_32_32( Q31_0_99, st->hFdCngDec->hFdCngCom->likelihood_noisy_speech_32fx ), + st->hFdCngDec->hFdCngCom->flag_noisy_speech * Q31_0_01); +#endif } if ( chan == 0 && st->core_brate <= SID_2k40 ) diff --git a/lib_dec/ivas_stereo_mdct_core_dec.c b/lib_dec/ivas_stereo_mdct_core_dec.c index fb72af1d6..52f621b61 100644 --- a/lib_dec/ivas_stereo_mdct_core_dec.c +++ b/lib_dec/ivas_stereo_mdct_core_dec.c @@ -705,7 +705,10 @@ static void run_min_stats( noisy_speech_detection_flt( st->hFdCngDec, st->VAD && st->m_frame_type == ACTIVE_FRAME, power_spec ); st->hFdCngDec->hFdCngCom->likelihood_noisy_speech_flt = 0.99f * st->hFdCngDec->hFdCngCom->likelihood_noisy_speech_flt + 0.01f * (float) st->hFdCngDec->hFdCngCom->flag_noisy_speech; - +#ifdef IVAS_FLOAT_FIXED + st->hFdCngDec->hFdCngCom->likelihood_noisy_speech_32fx = L_add( Mpy_32_32( Q31_0_99, st->hFdCngDec->hFdCngCom->likelihood_noisy_speech_32fx ), + st->hFdCngDec->hFdCngCom->flag_noisy_speech * Q31_0_01); +#endif st->lp_noise_float = st->hFdCngDec->lp_noise_float; } diff --git a/lib_dec/ivas_tcx_core_dec.c b/lib_dec/ivas_tcx_core_dec.c index 510656a16..56668cd46 100644 --- a/lib_dec/ivas_tcx_core_dec.c +++ b/lib_dec/ivas_tcx_core_dec.c @@ -732,7 +732,10 @@ void stereo_tcx_core_dec( noisy_speech_detection_flt( st->hFdCngDec, st->VAD && st->m_frame_type == ACTIVE_FRAME, signal_out ); st->hFdCngDec->hFdCngCom->likelihood_noisy_speech_flt = 0.99f * st->hFdCngDec->hFdCngCom->likelihood_noisy_speech_flt + 0.01f * (float) st->hFdCngDec->hFdCngCom->flag_noisy_speech; - +#ifdef IVAS_FLOAT_FIXED + st->hFdCngDec->hFdCngCom->likelihood_noisy_speech_32fx = L_add( Mpy_32_32( Q31_0_99, st->hFdCngDec->hFdCngCom->likelihood_noisy_speech_32fx ), + st->hFdCngDec->hFdCngCom->flag_noisy_speech * Q31_0_01); +#endif st->lp_noise_float = st->hFdCngDec->lp_noise_float; if ( st->element_mode != IVAS_CPE_TD ) @@ -774,7 +777,11 @@ void stereo_tcx_core_dec( expand_range_flt( st->hFdCngDec->msPsd_float, psd_part, st->hFdCngDec->nFFTpart_shaping ); #endif scalebands_flt( psd_part, st->hFdCngDec->part_shaping, st->hFdCngDec->nFFTpart_shaping, st->hFdCngDec->midband_shaping, st->hFdCngDec->nFFTpart_shaping, st->hFdCngDec->hFdCngCom->stopFFTbin - st->hFdCngDec->hFdCngCom->startBand, psd, 1 ); +#ifndef IVAS_FLOAT_FIXED generate_stereo_masking_noise( signal_out, st, hStereoTD, flag_sec_CNA, 0, hStereoCng, nchan_out ); +#else + generate_stereo_masking_noise_fx( signal_out, st, hStereoTD, flag_sec_CNA, 0, hStereoCng, nchan_out ); +#endif } else if ( st->element_mode != IVAS_CPE_DFT ) { -- GitLab