diff --git a/lib_com/interpol_fx.c b/lib_com/interpol_fx.c index a490a755269c9fa7d3f9bdeb440b60bf7f483253..030d417359a34f3d61c7948d4cbde3c2fad9b949 100644 --- a/lib_com/interpol_fx.c +++ b/lib_com/interpol_fx.c @@ -70,9 +70,14 @@ Word32 Interpol_lc_fx( /* o : interpolated value c2 += up_samp; /* move16() not needed, since the coefficient can be rearrange in bit exact way */ c1 += up_samp; } +#ifdef OPT_SBA_ENC_V2_BE + L_sum = W_shl_sat_l( L_sum64, 1 ); /*Q15*/ + } +#else L_sum = W_sat_l( L_sum64 ); /*Q14*/ } L_sum = L_shl_sat( L_sum, 1 ); /*Q15*/ +#endif return L_sum; } diff --git a/lib_com/ivas_spar_com_fx.c b/lib_com/ivas_spar_com_fx.c index ef549a2ae42ad2dfa3389548201920751d2a74e4..15a529149f6b89f7d806ace9bdcf031c32930d20 100644 --- a/lib_com/ivas_spar_com_fx.c +++ b/lib_com/ivas_spar_com_fx.c @@ -2361,18 +2361,30 @@ static void ivas_calc_p_coeffs_per_band_enc_fx( Word32 re1, re2; W_tmp = W_mult0_32_32( pSparMd->band_coeffs[b_ts_idx].C_re_fx[0][0], cov_dd_re[0][0] ); /*q_cov_dd_re+ pSparMd->band_coeffs[b_ts_idx].q_C_re_fx*/ +#ifdef OPT_SBA_ENC_V2_BE + q_tmp1 = sub( W_norm( W_tmp ), 32 ); + re1 = W_shl_sat_l( W_tmp, q_tmp1 ); /*q_cov_dd_re+ q_C_re+q_tmp1*/ + q_tmp1 = add( add( q_C_re, q_tmp1 ), q_cov_dd_re ); +#else q_tmp1 = W_norm( W_tmp ); re1 = W_extract_h( W_shl( W_tmp, q_tmp1 ) ); /*q_cov_dd_re+ q_C_re+q_tmp1-32*/ q_tmp1 = sub( add( add( q_C_re, q_tmp1 ), q_cov_dd_re ), 32 ); +#endif if ( W_tmp == 0 ) { q_tmp1 = 31; move16(); } W_tmp = W_mult0_32_32( pSparMd->band_coeffs[b_ts_idx].C_re_fx[1][0], cov_dd_re[0][0] ); /*q_cov_dd_re+ q_C_re*/ +#ifdef OPT_SBA_ENC_V2_BE + q_tmp = sub( W_norm( W_tmp ), 32 ); + re2 = W_shl_sat_l( W_tmp, q_tmp ); /*q_cov_dd_re+ q_C_re+q_tmp*/ + q_tmp = add( add( q_C_re, q_tmp ), q_cov_dd_re ); +#else q_tmp = W_norm( W_tmp ); re2 = W_extract_h( W_shl( W_tmp, q_tmp ) ); /*q_cov_dd_re+ q_C_re+q_tmp-32*/ q_tmp = sub( add( add( q_C_re, q_tmp ), q_cov_dd_re ), 32 ); +#endif if ( W_tmp == 0 ) { q_tmp = 31; @@ -2380,12 +2392,20 @@ static void ivas_calc_p_coeffs_per_band_enc_fx( } W_tmp = W_mult0_32_32( pSparMd->band_coeffs[b_ts_idx].C_re_fx[0][0], re1 ); // q_tmp1+q_C_re +#ifdef OPT_SBA_ENC_V2_BE + q_factor = sub( W_norm( W_tmp ), 32 ); + recon_uu_re[0][0] = W_shl_sat_l( W_tmp, q_factor ); // q_tmp1+q_C_re+q_recon_uu_re[0][0] + move32(); + q_recon_uu_re[0][0] = add( add( q_C_re, q_factor ), q_tmp1 ); + move16(); +#else q_recon_uu_re[0][0] = W_norm( W_tmp ); move16(); recon_uu_re[0][0] = W_extract_h( W_shl( W_tmp, q_recon_uu_re[0][0] ) ); // q_tmp1+q_C_re+q_recon_uu_re[0][0]-32 move32(); q_recon_uu_re[0][0] = sub( add( add( q_C_re, q_recon_uu_re[0][0] ), q_tmp1 ), 32 ); move16(); +#endif if ( W_tmp == 0 ) { q_recon_uu_re[0][0] = 31; @@ -2393,12 +2413,20 @@ static void ivas_calc_p_coeffs_per_band_enc_fx( } W_tmp = W_mult0_32_32( pSparMd->band_coeffs[b_ts_idx].C_re_fx[1][0], re1 ); // q_C_re+q_tmp1 +#ifdef OPT_SBA_ENC_V2_BE + q_factor = sub( W_norm( W_tmp ), 32 ); + recon_uu_re[0][1] = W_shl_sat_l( W_tmp, q_factor ); // q_C_re+q_tmp1+q_recon_uu_re[0][1] + move32(); + q_recon_uu_re[0][1] = add( add( q_C_re, q_factor ), q_tmp1 ); + move16(); +#else q_recon_uu_re[0][1] = W_norm( W_tmp ); move16(); recon_uu_re[0][1] = W_extract_h( W_shl( W_tmp, q_recon_uu_re[0][1] ) ); // q_C_re+q_tmp1+q_recon_uu_re[0][1]-32 move32(); q_recon_uu_re[0][1] = sub( add( add( q_C_re, q_recon_uu_re[0][1] ), q_tmp1 ), 32 ); move16(); +#endif if ( W_tmp == 0 ) { q_recon_uu_re[0][1] = 31; @@ -2406,12 +2434,20 @@ static void ivas_calc_p_coeffs_per_band_enc_fx( } W_tmp = W_mult0_32_32( pSparMd->band_coeffs[b_ts_idx].C_re_fx[0][0], re2 ); // q_C_re+q_tmp +#ifdef OPT_SBA_ENC_V2_BE + q_factor = sub( W_norm( W_tmp ), 32 ); + recon_uu_re[1][0] = W_shl_sat_l( W_tmp, q_factor ); // q_C_re+q_tmp+q_recon_uu_re[1][0] + move32(); + q_recon_uu_re[1][0] = add( add( q_C_re, q_factor ), q_tmp ); + move16(); +#else q_recon_uu_re[1][0] = W_norm( W_tmp ); move16(); recon_uu_re[1][0] = W_extract_h( W_shl( W_tmp, q_recon_uu_re[1][0] ) ); // q_C_re+q_tmp+q_recon_uu_re[1][0]-32 move32(); q_recon_uu_re[1][0] = sub( add( add( q_C_re, q_recon_uu_re[1][0] ), q_tmp ), 32 ); move16(); +#endif if ( W_tmp == 0 ) { q_recon_uu_re[1][0] = 31; @@ -2419,12 +2455,20 @@ static void ivas_calc_p_coeffs_per_band_enc_fx( } W_tmp = W_mult0_32_32( pSparMd->band_coeffs[b_ts_idx].C_re_fx[1][0], re2 ); // q_C_re+q_tmp +#ifdef OPT_SBA_ENC_V2_BE + q_factor = sub( W_norm( W_tmp ), 32 ); + recon_uu_re[1][1] = W_shl_sat_l( W_tmp, q_factor ); // q_C_re+q_tmp+q_recon_uu_re[1][1] + move32(); + q_recon_uu_re[1][1] = add( add( q_C_re, q_factor ), q_tmp ); + move16(); +#else q_recon_uu_re[1][1] = W_norm( W_tmp ); move16(); recon_uu_re[1][1] = W_extract_h( W_shl( W_tmp, q_recon_uu_re[1][1] ) ); // q_C_re+q_tmp+q_recon_uu_re[1][1]-32 move32(); q_recon_uu_re[1][1] = sub( add( add( q_C_re, q_recon_uu_re[1][1] ), q_tmp ), 32 ); move16(); +#endif if ( W_tmp == 0 ) { q_recon_uu_re[1][1] = 31; @@ -2441,12 +2485,18 @@ static void ivas_calc_p_coeffs_per_band_enc_fx( } } q_tmp = sub( s_min( q_tmp, q_cov_uu_re ), 1 ); - +#ifdef OPT_SBA_ENC_V2_BE + q_factor = sub( q_cov_uu_re, q_tmp ); +#endif FOR( i = 0; i < 2; i++ ) { FOR( j = 0; j < 2; j++ ) { +#ifdef OPT_SBA_ENC_V2_BE + cov_uu_re[i][j] = L_sub( L_shr( cov_uu_re[i][j], q_factor ), L_shr( recon_uu_re[i][j], sub( q_recon_uu_re[i][j], q_tmp ) ) ); // q_tmp +#else cov_uu_re[i][j] = L_sub( L_shr( cov_uu_re[i][j], sub( q_cov_uu_re, q_tmp ) ), L_shr( recon_uu_re[i][j], sub( q_recon_uu_re[i][j], q_tmp ) ) ); // q_tmp +#endif move32(); } } @@ -2466,9 +2516,15 @@ static void ivas_calc_p_coeffs_per_band_enc_fx( { Word32 re; W_tmp = W_mult0_32_32( pSparMd->band_coeffs[b_ts_idx].C_re_fx[0][k], cov_dd_re[k][j] ); // q_C_re+q_cov_dd_re +#ifdef OPT_SBA_ENC_V2_BE + q_tmp = sub( W_norm( W_tmp ), 33 ); + re = W_shl_sat_l( W_tmp, q_tmp ); // q_C_re+q_cov_dd_re+q_tmp + q_tmp = add( add( q_C_re, q_tmp ), q_cov_dd_re ); +#else q_tmp = sub( W_norm( W_tmp ), 1 ); re = W_extract_h( W_shl( W_tmp, q_tmp ) ); // q_C_re+q_cov_dd_re+q_tmp-32 q_tmp = sub( add( add( q_C_re, q_tmp ), q_cov_dd_re ), 32 ); +#endif if ( W_tmp == 0 ) { q_tmp = 31; @@ -2492,9 +2548,15 @@ static void ivas_calc_p_coeffs_per_band_enc_fx( } W_tmp = W_mult0_32_32( pSparMd->band_coeffs[b_ts_idx].C_re_fx[0][0], re1[0] ); // q_C_re+q_re1[0] +#ifdef OPT_SBA_ENC_V2_BE + q_tmp = sub( W_norm( W_tmp ), 33 ); + re2 = W_shl_sat_l( W_tmp, q_tmp ); // q_C_re+q_re1[0]+q_tmp + q_tmp = add( add( q_C_re, q_tmp ), q_re1[0] ); +#else q_tmp = sub( W_norm( W_tmp ), 1 ); re2 = W_extract_h( W_shl( W_tmp, q_tmp ) ); // q_C_re+q_re1[0]+q_tmp-32 q_tmp = sub( add( add( q_C_re, q_tmp ), q_re1[0] ), 32 ); +#endif if ( W_tmp == 0 ) { q_tmp = 31; @@ -2504,9 +2566,15 @@ static void ivas_calc_p_coeffs_per_band_enc_fx( move32(); W_tmp = W_mult0_32_32( pSparMd->band_coeffs[b_ts_idx].C_re_fx[0][1], re1[1] ); // q_C_re+q_re1[1] +#ifdef OPT_SBA_ENC_V2_BE + q_tmp1 = sub( W_norm( W_tmp ), 33 ); + re2 = W_shl_sat_l( W_tmp, q_tmp1 ); // q_C_re+q_re1[1]+q_tmp1 + q_tmp1 = add( add( q_C_re, q_tmp1 ), q_re1[1] ); +#else q_tmp1 = sub( W_norm( W_tmp ), 1 ); re2 = W_extract_h( W_shl( W_tmp, q_tmp1 ) ); // q_C_re+q_re1[1]+q_tmp1-32 q_tmp1 = sub( add( add( q_C_re, q_tmp1 ), q_re1[1] ), 32 ); +#endif if ( W_tmp == 0 ) { q_tmp1 = 31; @@ -2585,9 +2653,15 @@ static void ivas_calc_p_coeffs_per_band_enc_fx( FOR( k = 0; k < num_dmx - 1; k++ ) { W_tmp = W_mult0_32_32( pSparMd->band_coeffs[b_ts_idx].C_re_fx[i][k], cov_dd_re[k][m] ); // q_C_re+q_cov_dd_re +#ifdef OPT_SBA_ENC_V2_BE + q_tmp = sub( W_norm( W_tmp ), 34 ); + re = W_shl_sat_l( W_tmp, q_tmp ); // q_C_re+q_cov_dd_re+q_tmp + q_tmp = add( add( q_C_re, q_tmp ), q_cov_dd_re ); +#else q_tmp = sub( W_norm( W_tmp ), 2 ); re = W_extract_h( W_shl( W_tmp, q_tmp ) ); // q_C_re+q_cov_dd_re+q_tmp-32 q_tmp = sub( add( add( q_C_re, q_tmp ), q_cov_dd_re ), 32 ); +#endif if ( W_tmp == 0 ) { q_tmp = 31; @@ -2627,9 +2701,15 @@ static void ivas_calc_p_coeffs_per_band_enc_fx( FOR( m = 0; m < num_dmx - 1; m++ ) { W_tmp = W_mult0_32_32( pSparMd->band_coeffs[b_ts_idx].C_re_fx[j][m], re1[m] ); // q_C_re+q_re1[m] +#ifdef OPT_SBA_ENC_V2_BE + q_tmp = sub( W_norm( W_tmp ), 34 ); + re = W_shl_sat_l( W_tmp, q_tmp ); // q_C_re+q_re1[m]+q_tmp + q_tmp = add( add( q_C_re, q_tmp ), q_re1[m] ); +#else q_tmp = sub( W_norm( W_tmp ), 2 ); re = W_extract_h( W_shl( W_tmp, q_tmp ) ); // q_C_re+q_re1[m]+q_tmp-32 q_tmp = sub( add( add( q_C_re, q_tmp ), q_re1[m] ), 32 ); +#endif if ( W_tmp == 0 ) { q_tmp = 31; @@ -2714,9 +2794,15 @@ static void ivas_calc_p_coeffs_per_band_enc_fx( move16(); IF( trace != 0 ) { +#ifdef OPT_SBA_ENC_V2_BE + q_factor = sub( W_norm( trace ), 32 ); + tmp = Mpy_32_32( p_norm_scaling, W_shl_sat_l( trace, q_factor ) ); // q_cov_uu_re+q_factor + q_factor = add( q_cov_uu_re, q_factor ); +#else q_factor = W_norm( trace ); tmp = Mpy_32_32( p_norm_scaling, W_extract_h( W_shl( trace, q_factor ) ) ); // q_cov_uu_re+q_factor-32 q_factor = sub( add( q_cov_uu_re, q_factor ), 32 ); +#endif IF( GT_16( q_factor, q_postpred_cov_re ) ) { tmp = L_shr( tmp, sub( q_factor, q_postpred_cov_re ) ); // q_postpred_cov_re diff --git a/lib_com/options.h b/lib_com/options.h index ec5b35f4628187c27bea3f812638916323db1714..7996f8d4143b24c9e34caacb6cadd4f52efcfad8 100644 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -80,6 +80,7 @@ #define OPT_SBA_REND_V1_BE #define OPT_HEAD_ROT_REND_V1_BE #define OPT_SBA_DEC_V2_BE +#define OPT_SBA_ENC_V2_BE #define OPT_SBA_ENC_V1_BE #define OPT_BIN_RENDERER_V1 #define OPT_BIN_RENDERER_V2 diff --git a/lib_enc/ACcontextMapping_enc_fx.c b/lib_enc/ACcontextMapping_enc_fx.c index 5003712b7ceda9821bc022dd7e0275be48282386..a7944410b9f855c85dd7edb3d47b8b20e55e40cc 100644 --- a/lib_enc/ACcontextMapping_enc_fx.c +++ b/lib_enc/ACcontextMapping_enc_fx.c @@ -1249,6 +1249,11 @@ Word16 RCcontextMapping_encode2_estimate_no_mem_s17_LCS_fx( k = 1; move16(); +#ifdef OPT_SBA_ENC_V2_BE + Word16 round_bit_estimate_fx; + Word32 target_Q15 = L_shl( target, Q15 ); // Q15 +#endif + WHILE( LT_16( k, nt / 2 ) ) { bit_estimate_fx = W_add( bit_estimate_fx, MAKE_NUMBER_QX( 1, Q23 ) ); @@ -1334,7 +1339,7 @@ Word16 RCcontextMapping_encode2_estimate_no_mem_s17_LCS_fx( ctx = &c[L_or( p1, p2 )]; t = (UWord16) L_add( *ctx, rateFlag ); - IF( LT_16( nt_half, idx ) ) + if ( LT_16( nt_half, idx ) ) { t = add( t, ( 1 << NBITS_CONTEXT ) ); } @@ -1351,6 +1356,19 @@ Word16 RCcontextMapping_encode2_estimate_no_mem_s17_LCS_fx( /* check while condition */ /* MSBs coding */ +#ifdef OPT_SBA_ENC_V2_BE + FOR( ; s_max( a1, b1 ) >= A_THRES; ) + { + pki = lookup[lev1]; /* ESC symbol */ + + bit_estimate_fx = W_add( bit_estimate_fx, ari_bit_estimate_s17_LC_fx[pki][VAL_ESC] ); + bit_estimate_fx = W_add( bit_estimate_fx, MAKE_VARIABLE_QX( 2, Q23 ) ); + a1 = shr( a1, 1 ); + b1 = shr( b1, 1 ); + + lev1 = s_min( add( lev1, ( 1 << ( NBITS_CONTEXT + NBITS_RATEQ ) ) ), 2 << ( NBITS_CONTEXT + NBITS_RATEQ ) ); + } +#else WHILE( GE_16( s_max( a1, b1 ), A_THRES ) ) { pki = lookup[lev1]; /* ESC symbol */ @@ -1364,14 +1382,18 @@ Word16 RCcontextMapping_encode2_estimate_no_mem_s17_LCS_fx( /* check while condition */ } - +#endif pki = lookup[lev1]; symbol = add( a1, i_mult( A_THRES, b1 ) ); /* Q0 */ bit_estimate_fx = W_add( bit_estimate_fx, ari_bit_estimate_s17_LC_fx[pki][symbol] ); +#ifdef OPT_SBA_ENC_V2_BE + IF( GT_32( W_shl_sat_l( bit_estimate_fx, -Q8 ), target_Q15 ) ) // Q15 +#else /* Should we truncate? */ IF( GT_32( W_extract_l( W_shr( bit_estimate_fx, Q8 ) ), L_shl( target, Q15 ) ) ) +#endif { stop2 = 1; move16(); @@ -1393,6 +1415,13 @@ Word16 RCcontextMapping_encode2_estimate_no_mem_s17_LCS_fx( { lev1 = shr( lev1, NBITS_CONTEXT + NBITS_RATEQ ); +#ifdef OPT_SBA_ENC_V2_BE + t = add( 13, lev1 ); + IF( lev1 <= 0 ) + { + t = add( 1, i_mult( add( a1, b1 ), add( lev1, 2 ) ) ); + } +#else IF( lev1 <= 0 ) { t = add( 1, i_mult( add( a1, b1 ), add( lev1, 2 ) ) ); @@ -1401,6 +1430,7 @@ Word16 RCcontextMapping_encode2_estimate_no_mem_s17_LCS_fx( { t = add( 13, lev1 ); } +#endif *ctx = L_add( imult3216( L_and( *ctx, 0xf ), 16 ), t ); move32(); @@ -1425,15 +1455,21 @@ Word16 RCcontextMapping_encode2_estimate_no_mem_s17_LCS_fx( } } /*end of the 2-tuples loop*/ - - total_output_bits = round_fx( W_extract_l( W_shr( bit_estimate_fx, Q7 ) ) ); /* Q23 -> Q16 -> Q0 */ - +#ifdef OPT_SBA_ENC_V2_BE + total_output_bits = round_fx( W_shl_sat_l( bit_estimate_fx, -Q7 ) ); /* Q23 -> Q16 -> Q0 */ +#else + total_output_bits = round_fx( W_extract_l( W_shr( bit_estimate_fx, Q7 ) ) ); /* Q23 -> Q16 -> Q0 */ +#endif IF( *stop ) { - total_output_bits = round_fx( W_extract_l( W_shr( nbits2_fx, Q7 ) ) ); /* Q23 -> Q16 -> Q0 */ +#ifdef OPT_SBA_ENC_V2_BE + total_output_bits = round_fx( W_shl_sat_l( nbits2_fx, -Q7 ) ); /* Q23 -> Q16 -> Q0 */ +#else + total_output_bits = round_fx( W_extract_l( W_shr( nbits2_fx, Q7 ) ) ); /* Q23 -> Q16 -> Q0 */ +#endif } - IF( stop2 ) + if ( stop2 ) { stop2 = total_output_bits; /* Q0 */ move16(); @@ -1455,8 +1491,11 @@ Word16 RCcontextMapping_encode2_estimate_no_mem_s17_LCS_fx( hm_cfg->numPeakIndices = numPeakIndicesOrig; /* Q0 */ move16(); - +#ifdef OPT_SBA_ENC_V2_BE + return round_fx( L_add( W_shl_sat_l( nbits2_fx, -Q7 ), ONE_IN_Q14 ) ); /* Q0 */ +#else return round_fx( L_add( W_extract_l( W_shr( nbits2_fx, Q7 ) ), ONE_IN_Q14 ) ); /* Q0 */ +#endif } ELSE /* if (!hm_cfg) */ { @@ -1530,6 +1569,21 @@ Word16 RCcontextMapping_encode2_estimate_no_mem_s17_LCS_fx( /* check while condition */ /* MSBs coding */ +#ifdef OPT_SBA_ENC_V2_BE + FOR( ; s_max( a1, b1 ) >= A_THRES; ) + { + pki = lookup[( esc_nb << ( NBITS_CONTEXT + NBITS_RATEQ ) )]; /* Q0 */ + + bit_estimate_fx = W_add( bit_estimate_fx, ari_bit_estimate_s17_LC_fx[pki][VAL_ESC] ); + bit_estimate_fx = W_add( bit_estimate_fx, MAKE_NUMBER_QX( 2, Q23 ) ); + + a1 = shr( a1, 1 ); + b1 = shr( b1, 1 ); + + lev1 = add( lev1, 1 ); + esc_nb = s_min( lev1, 3 ); + } +#else WHILE( GE_16( s_max( a1, b1 ), A_THRES ) ) { pki = lookup[( esc_nb << ( NBITS_CONTEXT + NBITS_RATEQ ) )]; /* Q0 */ @@ -1546,15 +1600,18 @@ Word16 RCcontextMapping_encode2_estimate_no_mem_s17_LCS_fx( /* check while condition */ } - +#endif pki = lookup[( esc_nb << ( NBITS_CONTEXT + NBITS_RATEQ ) )]; /* Q0 */ - move16(); symbol = add( a1, i_mult( A_THRES, b1 ) ); /* Q0 */ bit_estimate_fx = W_add( bit_estimate_fx, ari_bit_estimate_s17_LC_fx[pki][symbol] ); /* Should we truncate? */ +#ifdef OPT_SBA_ENC_V2_BE + IF( GT_32( W_shl_sat_l( bit_estimate_fx, -Q8 ), target_Q15 ) ) // Q15 +#else IF( GT_32( W_extract_l( W_shr( bit_estimate_fx, Q8 ) ), L_shl( target, Q15 ) ) ) +#endif { overflow_flag = 1; move16(); @@ -1570,6 +1627,14 @@ Word16 RCcontextMapping_encode2_estimate_no_mem_s17_LCS_fx( } } +#ifdef OPT_SBA_ENC_V2_BE + /* Update context for next 2-tuple */ + cp = add( 1, i_mult( add( a1, b1 ), add( esc_nb, 1 ) ) ); /* Q0 */ + if ( GE_16( esc_nb, 2 ) ) + { + cp = add( 12, esc_nb ); /* Q0 */ + } +#else /* Update context for next 2-tuple */ IF( LT_16( esc_nb, 2 ) ) { @@ -1579,59 +1644,94 @@ Word16 RCcontextMapping_encode2_estimate_no_mem_s17_LCS_fx( { cp = add( 12, esc_nb ); /* Q0 */ } +#endif /*shift old bits and replace last 4 bits*/ s = (UWord16) L_add( L_shl( s, 4 ), cp ); t = s_and( s, 0xFF ); - } /*end of the 2-tuples loop*/ +#ifdef OPT_SBA_ENC_V2_BE + tot_bits2 = round_fx( W_shl_sat_l( nbits2_fx, -Q7 ) ); /* Q23 -> Q16 -> Q0 */ + round_bit_estimate_fx = round_fx( W_shl_sat_l( bit_estimate_fx, -Q7 ) ); /* Q23 -> Q16 -> Q0 */ +#else tot_bits2 = round_fx( W_extract_l( W_shr( nbits2_fx, Q7 ) ) ); /* Q23 -> Q16 -> Q0 */ - IF( LT_16( lastnz2, lastnz ) ) /* Overflow occured because unable to code all tuples */ +#endif + if ( LT_16( lastnz2, lastnz ) ) /* Overflow occured because unable to code all tuples */ { overflow_flag = 1; move16(); } +#ifdef OPT_SBA_ENC_V2_BE + if ( EQ_16( mode, -1 ) ) + { + tot_bits2 = round_bit_estimate_fx; + move16(); + } +#else IF( EQ_16( mode, -1 ) ) { - tot_bits2 = round_fx( W_extract_l( W_shr( bit_estimate_fx, Q7 ) ) ); /* Q23 -> Q16 -> Q0 */ + tot_bits2 = round_fx( W_shl_sat_l( bit_estimate_fx, -Q7 ) ); /* Q23 -> Q16 -> Q0 */ + } +#endif +#ifdef OPT_SBA_ENC_V2_BE + if ( overflow_flag == 0 ) /* No overflow */ + { + *stop = 0; + move16(); } + IF( overflow_flag != 0 ) /* Overflow */ + { + IF( *stop ) + { + *stop = tot_bits2; /* Q0 */ + move16(); + } + ELSE + { + *stop = round_bit_estimate_fx; + move16(); + } + } +#else IF( overflow_flag == 0 ) /* No overflow */ { *stop = 0; move16(); } ELSE /* Overflow */ + { + IF( *stop ) { - IF( *stop ){ - *stop = tot_bits2; /* Q0 */ + *stop = tot_bits2; /* Q0 */ + move16(); + } + ELSE + { + *stop = round_fx( W_extract_l( W_shr( bit_estimate_fx, Q7 ) ) ); /* Q23 -> Q16 -> Q0 */ + move16(); + } + } +#endif + + *lastnz_out = lastnz; /* Q0 */ move16(); - } - ELSE - { - *stop = round_fx( W_extract_l( W_shr( bit_estimate_fx, Q7 ) ) ); /* Q23 -> Q16 -> Q0 */ + *nEncoded = lastnz2; /* Q0 */ move16(); - } -} + /* Safety mechanism to avoid overflow */ + test(); + IF( EQ_16( lastnz2, 2 ) && EQ_16( overflow_flag, 1 ) ) + { + FOR( k = 0; k < lastnz2; k++ ) + { + x[k] = 0; + move16(); + } + } -*lastnz_out = lastnz; /* Q0 */ -move16(); -*nEncoded = lastnz2; /* Q0 */ -move16(); -/* Safety mechanism to avoid overflow */ -test(); -IF( EQ_16( lastnz2, 2 ) && EQ_16( overflow_flag, 1 ) ) -{ - FOR( k = 0; k < lastnz2; k++ ) - { - x[k] = 0; - move16(); + return tot_bits2; } } -return tot_bits2; -} -} - /*-------------------------------------------------------------------* * RCcontextMapping_encode2_estimate_bandWise_start_fx() @@ -1743,6 +1843,15 @@ Word16 RCcontextMapping_encode2_estimate_bandWise_fx( /* Get context */ t = add( hContextMem->ctx, hContextMem->rateFlag ); /* Q0 */ +#ifdef OPT_SBA_ENC_V2_BE + tmp = ( 1 << NBITS_CONTEXT ); + move16(); + if ( GE_16( hContextMem->nt_half, idx ) ) + { + tmp = 0; + move16(); + } +#else IF( GE_16( hContextMem->nt_half, idx ) ) { tmp = 0; @@ -1751,6 +1860,7 @@ Word16 RCcontextMapping_encode2_estimate_bandWise_fx( { tmp = ( 1 << NBITS_CONTEXT ); } +#endif t = add( t, tmp ); /* Q0 */ @@ -1771,15 +1881,18 @@ Word16 RCcontextMapping_encode2_estimate_bandWise_fx( /* check while condition */ /* MSBs coding */ +#ifdef OPT_SBA_ENC_V2_BE + FOR( ; s_max( a1, b1 ) >= A_THRES; ) +#else WHILE( GE_16( s_max( a1, b1 ), A_THRES ) ) +#endif { pki = lookup[lev1]; /* Q0 */ - move16(); hContextMem->bit_estimate_fx = W_add( hContextMem->bit_estimate_fx, ari_bit_estimate_s17_LC_fx[pki][VAL_ESC] ); hContextMem->bit_estimate_fx = W_add( hContextMem->bit_estimate_fx, MAKE_NUMBER_QX( 2, Q23 ) ); - move32(); - move32(); + move64(); + move64(); // hContextMem->bit_estimate = hContextMem->bit_estimate + ari_bit_estimate_s17_LC[pki][VAL_ESC]; @@ -1789,20 +1902,27 @@ Word16 RCcontextMapping_encode2_estimate_bandWise_fx( b1 = shr( b1, 1 ); lev1 = s_min( add( lev1, ( 1 << ( NBITS_CONTEXT + NBITS_RATEQ ) ) ), 2 << ( NBITS_CONTEXT + NBITS_RATEQ ) ); /* Q0 */ - /* check while condition */ + /* check while condition */ } pki = lookup[lev1]; /* Q0 */ - move16(); + symbol = add( a1, i_mult( A_THRES, b1 ) ); /* MSB symbol Q0*/ hContextMem->bit_estimate_fx = W_add( hContextMem->bit_estimate_fx, ari_bit_estimate_s17_LC_fx[pki][symbol] ); - move32(); + move64(); // hContextMem->bit_estimate = hContextMem->bit_estimate + ari_bit_estimate_s17_LC[pki][symbol]; /* Update context */ lev1 = shr( lev1, NBITS_CONTEXT + NBITS_RATEQ ); +#ifdef OPT_SBA_ENC_V2_BE + t = add( 1, i_mult( add( a1, b1 ), add( lev1, 2 ) ) ); /* Q0 */ + if ( lev1 > 0 ) + { + t = add( 13, lev1 ); /* Q0 */ + } +#else IF( lev1 <= 0 ) { t = add( 1, i_mult( add( a1, b1 ), add( lev1, 2 ) ) ); /* Q0 */ @@ -1811,12 +1931,16 @@ Word16 RCcontextMapping_encode2_estimate_bandWise_fx( { t = add( 13, lev1 ); /* Q0 */ } - +#endif hContextMem->ctx = add( i_mult( s_and( hContextMem->ctx, 0xf ), 16 ), t ); /* Q0 */ move16(); - } /*end of the 2-tuples loop*/ + } /*end of the 2-tuples loop*/ +#ifdef OPT_SBA_ENC_V2_BE + total_output_bits = round_fx( W_shl_sat_l( hContextMem->bit_estimate_fx, -Q7 ) ); /* Q0 */ +#else total_output_bits = round_fx( W_extract_l( W_shr( hContextMem->bit_estimate_fx, Q7 ) ) ); /* Q0 */ +#endif // total_output_bits = (Word16) ( hContextMem->bit_estimate + 0.5f );