diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl index cc03a87b3..4ae64847d 100644 --- a/vp9/common/vp9_rtcd_defs.pl +++ b/vp9/common/vp9_rtcd_defs.pl @@ -1357,46 +1357,47 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { add_proto qw/int64_t vp9_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz"; specialize qw/vp9_block_error/; - add_proto qw/void vp9_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; + add_proto qw/void vp9_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; specialize qw/vp9_quantize_fp/; - add_proto qw/void vp9_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; + add_proto qw/void vp9_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; specialize qw/vp9_quantize_fp_32x32/; - add_proto qw/void vp9_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; + add_proto qw/void vp9_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; specialize qw/vp9_quantize_b/; - add_proto qw/void vp9_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; + add_proto qw/void vp9_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; specialize qw/vp9_quantize_b_32x32/; if (vpx_config("CONFIG_TX64X64") eq "yes") { - add_proto qw/void vp9_quantize_fp_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; + add_proto qw/void vp9_quantize_fp_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; specialize qw/vp9_quantize_fp_64x64/; - add_proto qw/void vp9_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; + add_proto qw/void vp9_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; specialize qw/vp9_quantize_b_64x64/; } + } else { add_proto qw/int64_t vp9_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz"; specialize qw/vp9_block_error avx2/, "$sse2_x86inc"; - add_proto qw/void vp9_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; - specialize qw/vp9_quantize_fp neon/, "$ssse3_x86_64"; + add_proto qw/void vp9_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; + specialize qw/vp9_quantize_fp neon sse2/, "$ssse3_x86_64"; - add_proto qw/void vp9_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; + add_proto qw/void vp9_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; specialize qw/vp9_quantize_fp_32x32/, "$ssse3_x86_64"; - add_proto qw/void vp9_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; + add_proto qw/void vp9_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; specialize qw/vp9_quantize_b sse2/, "$ssse3_x86_64"; - add_proto qw/void vp9_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; + add_proto qw/void vp9_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; specialize qw/vp9_quantize_b_32x32/, "$ssse3_x86_64"; if (vpx_config("CONFIG_TX64X64") eq "yes") { - add_proto qw/void vp9_quantize_fp_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; + add_proto qw/void vp9_quantize_fp_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; specialize qw/vp9_quantize_fp_64x64/; - add_proto qw/void vp9_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; + add_proto qw/void vp9_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; specialize qw/vp9_quantize_b_64x64/; } } @@ -2112,23 +2113,23 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { add_proto qw/void vp9_highbd_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride, int bd"; specialize qw/vp9_highbd_subtract_block/; - add_proto qw/void vp9_highbd_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; + add_proto qw/void vp9_highbd_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; specialize qw/vp9_highbd_quantize_fp/; - add_proto qw/void vp9_highbd_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; + add_proto qw/void vp9_highbd_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; specialize qw/vp9_highbd_quantize_fp_32x32/; - add_proto qw/void vp9_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; - specialize qw/vp9_highbd_quantize_b/; + add_proto qw/void vp9_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; + specialize qw/vp9_highbd_quantize_b sse2/; - add_proto qw/void vp9_highbd_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; - specialize qw/vp9_highbd_quantize_b_32x32/; + add_proto qw/void vp9_highbd_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; + specialize qw/vp9_highbd_quantize_b_32x32 sse2/; if (vpx_config("CONFIG_TX64X64") eq "yes") { - add_proto qw/void vp9_highbd_quantize_fp_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; + add_proto qw/void vp9_highbd_quantize_fp_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; specialize qw/vp9_highbd_quantize_fp_64x64/; - add_proto qw/void vp9_highbd_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; + add_proto qw/void vp9_highbd_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; specialize qw/vp9_highbd_quantize_b_64x64/; } diff --git a/vp9/encoder/arm/neon/vp9_quantize_neon.c b/vp9/encoder/arm/neon/vp9_quantize_neon.c index 8c13d0da6..9cf1e5e2c 100644 --- a/vp9/encoder/arm/neon/vp9_quantize_neon.c +++ b/vp9/encoder/arm/neon/vp9_quantize_neon.c @@ -26,13 +26,12 @@ void vp9_quantize_fp_neon(const int16_t *coeff_ptr, intptr_t count, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, - int zbin_oq_value, uint16_t *eob_ptr, + uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan) { // TODO(jingning) Decide the need of these arguments after the // quantization process is completed. (void)zbin_ptr; (void)quant_shift_ptr; - (void)zbin_oq_value; (void)scan; if (!skip_block) { diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h index 5194c4c27..c5d30b3c1 100644 --- a/vp9/encoder/vp9_block.h +++ b/vp9/encoder/vp9_block.h @@ -40,8 +40,6 @@ struct macroblock_plane { int16_t *round; int64_t quant_thred[2]; - // Zbin Over Quant value - int16_t zbin_extra; }; /* The [2] dimension is for whether we skip the EOB node (i.e. if previous diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index 8be5ed31f..4278336d3 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -4766,8 +4766,6 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled, set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]); - vp9_update_zbin_extra(x); - if (!is_inter_block(mbmi)) { int plane; mbmi->skip = 1; @@ -4979,8 +4977,6 @@ static void predict_superblock(VP9_COMP *cpi, set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]); - vp9_update_zbin_extra(x); - for (ref = 0; ref < 1 + is_compound; ++ref) { YV12_BUFFER_CONFIG *cfg = get_ref_frame_buffer(cpi, mbmi->ref_frame[ref]); @@ -5011,8 +5007,6 @@ static void predict_superblock_sub8x8_extend(VP9_COMP *cpi, set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]); - vp9_update_zbin_extra(x); - for (ref = 0; ref < 1 + is_compound; ++ref) { YV12_BUFFER_CONFIG *cfg = get_ref_frame_buffer(cpi, mbmi->ref_frame[ref]); diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c index af4729711..341dedda4 100644 --- a/vp9/encoder/vp9_encodemb.c +++ b/vp9/encoder/vp9_encodemb.c @@ -634,7 +634,7 @@ void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block, vp9_tx_identity(src_diff, coeff, diff_stride, 64, shift); vp9_quantize_fp_64x64(coeff, 4096, x->skip_block, p->zbin, p->round_fp, p->quant_fp, p->quant_shift, qcoeff, dqcoeff, - pd->dequant, p->zbin_extra, eob, scan_order->scan, + pd->dequant, eob, scan_order->scan, scan_order->iscan); break; #endif // CONFIG_TX64X64 @@ -642,28 +642,28 @@ void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block, vp9_tx_identity(src_diff, coeff, diff_stride, 32, shift); vp9_quantize_fp_32x32(coeff, 1024, x->skip_block, p->zbin, p->round_fp, p->quant_fp, p->quant_shift, qcoeff, dqcoeff, - pd->dequant, p->zbin_extra, eob, scan_order->scan, + pd->dequant, eob, scan_order->scan, scan_order->iscan); break; case TX_16X16: vp9_tx_identity(src_diff, coeff, diff_stride, 16, shift); vp9_quantize_fp(coeff, 256, x->skip_block, p->zbin, p->round_fp, p->quant_fp, p->quant_shift, qcoeff, dqcoeff, - pd->dequant, p->zbin_extra, eob, + pd->dequant, eob, scan_order->scan, scan_order->iscan); break; case TX_8X8: vp9_tx_identity(src_diff, coeff, diff_stride, 8, shift); vp9_quantize_fp(coeff, 64, x->skip_block, p->zbin, p->round_fp, p->quant_fp, p->quant_shift, qcoeff, dqcoeff, - pd->dequant, p->zbin_extra, eob, + pd->dequant, eob, scan_order->scan, scan_order->iscan); break; case TX_4X4: vp9_tx_identity(src_diff, coeff, diff_stride, 4, shift); vp9_quantize_fp(coeff, 16, x->skip_block, p->zbin, p->round_fp, p->quant_fp, p->quant_shift, qcoeff, dqcoeff, - pd->dequant, p->zbin_extra, eob, + pd->dequant, eob, scan_order->scan, scan_order->iscan); break; default: @@ -683,7 +683,7 @@ void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block, vp9_highbd_quantize_fp_64x64(coeff, 4096, x->skip_block, p->zbin, p->round_fp, p->quant_fp, p->quant_shift, qcoeff, dqcoeff, pd->dequant, - p->zbin_extra, eob, scan_order->scan, + eob, scan_order->scan, scan_order->iscan); break; #endif // CONFIG_TX64X64 @@ -692,7 +692,7 @@ void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block, vp9_highbd_quantize_fp_32x32(coeff, 1024, x->skip_block, p->zbin, p->round_fp, p->quant_fp, p->quant_shift, qcoeff, dqcoeff, pd->dequant, - p->zbin_extra, eob, scan_order->scan, + eob, scan_order->scan, scan_order->iscan); break; case TX_16X16: @@ -703,7 +703,7 @@ void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block, #endif vp9_highbd_quantize_fp(coeff, 256, x->skip_block, p->zbin, p->round_fp, p->quant_fp, p->quant_shift, qcoeff, dqcoeff, - pd->dequant, p->zbin_extra, eob, + pd->dequant, eob, scan_order->scan, scan_order->iscan); break; case TX_8X8: @@ -714,7 +714,7 @@ void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block, #endif vp9_highbd_quantize_fp(coeff, 64, x->skip_block, p->zbin, p->round_fp, p->quant_fp, p->quant_shift, qcoeff, dqcoeff, - pd->dequant, p->zbin_extra, eob, + pd->dequant, eob, scan_order->scan, scan_order->iscan); break; case TX_4X4: @@ -725,7 +725,7 @@ void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block, #endif vp9_highbd_quantize_fp(coeff, 16, x->skip_block, p->zbin, p->round_fp, p->quant_fp, p->quant_shift, qcoeff, dqcoeff, - pd->dequant, p->zbin_extra, eob, + pd->dequant, eob, scan_order->scan, scan_order->iscan); break; default: @@ -741,7 +741,7 @@ void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block, vp9_fdct64x64(src_diff, coeff, diff_stride); vp9_quantize_fp_64x64(coeff, 4096, x->skip_block, p->zbin, p->round_fp, p->quant_fp, p->quant_shift, qcoeff, dqcoeff, - pd->dequant, p->zbin_extra, eob, scan_order->scan, + pd->dequant, eob, scan_order->scan, scan_order->iscan); break; #endif // CONFIG_TX64X64 @@ -749,7 +749,7 @@ void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block, fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride); vp9_quantize_fp_32x32(coeff, 1024, x->skip_block, p->zbin, p->round_fp, p->quant_fp, p->quant_shift, qcoeff, dqcoeff, - pd->dequant, p->zbin_extra, eob, scan_order->scan, + pd->dequant, eob, scan_order->scan, scan_order->iscan); break; case TX_16X16: @@ -760,7 +760,7 @@ void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block, #endif vp9_quantize_fp(coeff, 256, x->skip_block, p->zbin, p->round_fp, p->quant_fp, p->quant_shift, qcoeff, dqcoeff, - pd->dequant, p->zbin_extra, eob, + pd->dequant, eob, scan_order->scan, scan_order->iscan); break; case TX_8X8: @@ -771,7 +771,7 @@ void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block, #endif vp9_quantize_fp(coeff, 64, x->skip_block, p->zbin, p->round_fp, p->quant_fp, p->quant_shift, qcoeff, dqcoeff, - pd->dequant, p->zbin_extra, eob, + pd->dequant, eob, scan_order->scan, scan_order->iscan); break; case TX_4X4: @@ -782,7 +782,7 @@ void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block, #endif vp9_quantize_fp(coeff, 16, x->skip_block, p->zbin, p->round_fp, p->quant_fp, p->quant_shift, qcoeff, dqcoeff, - pd->dequant, p->zbin_extra, eob, + pd->dequant, eob, scan_order->scan, scan_order->iscan); break; default: @@ -987,7 +987,7 @@ void vp9_xform_quant(MACROBLOCK *x, int plane, int block, vp9_tx_identity(src_diff, coeff, diff_stride, 64, shift); vp9_quantize_b_64x64(coeff, 4096, x->skip_block, p->zbin, p->round, p->quant, p->quant_shift, qcoeff, dqcoeff, - pd->dequant, p->zbin_extra, eob, scan_order->scan, + pd->dequant, eob, scan_order->scan, scan_order->iscan); break; #endif // CONFIG_TX64X64 @@ -995,28 +995,28 @@ void vp9_xform_quant(MACROBLOCK *x, int plane, int block, vp9_tx_identity(src_diff, coeff, diff_stride, 32, shift); vp9_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round, p->quant, p->quant_shift, qcoeff, dqcoeff, - pd->dequant, p->zbin_extra, eob, scan_order->scan, + pd->dequant, eob, scan_order->scan, scan_order->iscan); break; case TX_16X16: vp9_tx_identity(src_diff, coeff, diff_stride, 16, shift); vp9_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round, p->quant, p->quant_shift, qcoeff, dqcoeff, - pd->dequant, p->zbin_extra, eob, + pd->dequant, eob, scan_order->scan, scan_order->iscan); break; case TX_8X8: vp9_tx_identity(src_diff, coeff, diff_stride, 8, shift); vp9_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, p->quant, p->quant_shift, qcoeff, dqcoeff, - pd->dequant, p->zbin_extra, eob, + pd->dequant, eob, scan_order->scan, scan_order->iscan); break; case TX_4X4: vp9_tx_identity(src_diff, coeff, diff_stride, 4, shift); vp9_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant, p->quant_shift, qcoeff, dqcoeff, - pd->dequant, p->zbin_extra, eob, + pd->dequant, eob, scan_order->scan, scan_order->iscan); break; default: @@ -1035,7 +1035,7 @@ void vp9_xform_quant(MACROBLOCK *x, int plane, int block, vp9_highbd_fdct64x64(src_diff, coeff, diff_stride); vp9_highbd_quantize_b_64x64(coeff, 4096, x->skip_block, p->zbin, p->round, p->quant, p->quant_shift, qcoeff, - dqcoeff, pd->dequant, p->zbin_extra, eob, + dqcoeff, pd->dequant, eob, scan_order->scan, scan_order->iscan); break; #endif // CONFIG_TX64X64 @@ -1043,7 +1043,7 @@ void vp9_xform_quant(MACROBLOCK *x, int plane, int block, highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride); vp9_highbd_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round, p->quant, p->quant_shift, qcoeff, - dqcoeff, pd->dequant, p->zbin_extra, eob, + dqcoeff, pd->dequant, eob, scan_order->scan, scan_order->iscan); break; case TX_16X16: @@ -1054,7 +1054,7 @@ void vp9_xform_quant(MACROBLOCK *x, int plane, int block, #endif vp9_highbd_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round, p->quant, p->quant_shift, qcoeff, dqcoeff, - pd->dequant, p->zbin_extra, eob, + pd->dequant, eob, scan_order->scan, scan_order->iscan); break; case TX_8X8: @@ -1065,7 +1065,7 @@ void vp9_xform_quant(MACROBLOCK *x, int plane, int block, #endif vp9_highbd_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, p->quant, p->quant_shift, qcoeff, dqcoeff, - pd->dequant, p->zbin_extra, eob, + pd->dequant, eob, scan_order->scan, scan_order->iscan); break; case TX_4X4: @@ -1076,7 +1076,7 @@ void vp9_xform_quant(MACROBLOCK *x, int plane, int block, #endif vp9_highbd_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant, p->quant_shift, qcoeff, dqcoeff, - pd->dequant, p->zbin_extra, eob, + pd->dequant, eob, scan_order->scan, scan_order->iscan); break; default: @@ -1092,7 +1092,7 @@ void vp9_xform_quant(MACROBLOCK *x, int plane, int block, vp9_fdct64x64(src_diff, coeff, diff_stride); vp9_quantize_b_64x64(coeff, 4096, x->skip_block, p->zbin, p->round, p->quant, p->quant_shift, qcoeff, dqcoeff, - pd->dequant, p->zbin_extra, eob, scan_order->scan, + pd->dequant, eob, scan_order->scan, scan_order->iscan); break; #endif // CONFIG_TX64X64 @@ -1100,7 +1100,7 @@ void vp9_xform_quant(MACROBLOCK *x, int plane, int block, fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride); vp9_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round, p->quant, p->quant_shift, qcoeff, dqcoeff, - pd->dequant, p->zbin_extra, eob, scan_order->scan, + pd->dequant, eob, scan_order->scan, scan_order->iscan); break; case TX_16X16: @@ -1111,7 +1111,7 @@ void vp9_xform_quant(MACROBLOCK *x, int plane, int block, #endif vp9_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round, p->quant, p->quant_shift, qcoeff, dqcoeff, - pd->dequant, p->zbin_extra, eob, + pd->dequant, eob, scan_order->scan, scan_order->iscan); break; case TX_8X8: @@ -1122,7 +1122,7 @@ void vp9_xform_quant(MACROBLOCK *x, int plane, int block, #endif vp9_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, p->quant, p->quant_shift, qcoeff, dqcoeff, - pd->dequant, p->zbin_extra, eob, + pd->dequant, eob, scan_order->scan, scan_order->iscan); break; case TX_4X4: @@ -1133,7 +1133,7 @@ void vp9_xform_quant(MACROBLOCK *x, int plane, int block, #endif vp9_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant, p->quant_shift, qcoeff, dqcoeff, - pd->dequant, p->zbin_extra, eob, + pd->dequant, eob, scan_order->scan, scan_order->iscan); break; default: @@ -1462,7 +1462,7 @@ static int vp9_dpcm_intra(uint8_t *src, int src_stride, diff_stride, bs, shift); vp9_quantize_rect(coeff + i, bs, 1, p->zbin, p->round, p->quant, p->quant_shift, qcoeff + i, dqcoeff + i, - pd->dequant, p->zbin_extra, logsizeby32, bs, i == 0); + pd->dequant, logsizeby32, bs, i == 0); vp9_tx_identity_add_rect(dqcoeff + i, dst + i, bs, 1, bs, dst_stride, shift); if ( i < bs - 1 && 1) @@ -1482,7 +1482,7 @@ static int vp9_dpcm_intra(uint8_t *src, int src_stride, diff_stride, bs, shift); vp9_quantize_rect(coeff + bs * i, 1, bs, p->zbin, p->round, p->quant, p->quant_shift, qcoeff + bs * i, dqcoeff + bs * i, - pd->dequant, p->zbin_extra, logsizeby32, bs, i == 0); + pd->dequant, logsizeby32, bs, i == 0); vp9_tx_identity_add_rect(dqcoeff + bs * i, dst + dst_stride * i, 1, bs, bs, dst_stride, shift); if (i < bs - 1) @@ -1496,7 +1496,7 @@ static int vp9_dpcm_intra(uint8_t *src, int src_stride, vp9_tx_identity_rect(src_diff, coeff, 1, bs, diff_stride, bs, shift); vp9_quantize_rect(coeff, 1, bs, p->zbin, p->round, p->quant, p->quant_shift, qcoeff, dqcoeff, pd->dequant, - p->zbin_extra, logsizeby32, bs, 1); + logsizeby32, bs, 1); vp9_tx_identity_add_rect(dqcoeff, dst, 1, bs, bs, dst_stride, shift); vp9_subtract_block_c(bs -1, 1, src_diff + diff_stride, diff_stride, @@ -1506,7 +1506,7 @@ static int vp9_dpcm_intra(uint8_t *src, int src_stride, diff_stride, bs, shift); vp9_quantize_rect(coeff + bs, bs - 1, 1, p->zbin, p->round, p->quant, p->quant_shift, qcoeff + bs, dqcoeff + bs, - pd->dequant, p->zbin_extra, logsizeby32, bs, 0); + pd->dequant, logsizeby32, bs, 0); vp9_tx_identity_add_rect(dqcoeff + bs, dst + dst_stride, bs - 1, 1, bs, dst_stride, shift); @@ -1526,7 +1526,7 @@ static int vp9_dpcm_intra(uint8_t *src, int src_stride, vp9_quantize_rect(coeff + bs * i + j, 1, 1, p->zbin, p->round, p->quant, p->quant_shift, qcoeff + bs * i + j, dqcoeff + bs * i + j, pd->dequant, - p->zbin_extra, logsizeby32, bs, 0); + logsizeby32, bs, 0); vp9_tx_identity_add_rect(dqcoeff + bs * i + j, dst + dst_stride * i + j, 1, 1, bs, dst_stride, shift); @@ -1600,7 +1600,7 @@ static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, vp9_tx_identity(src_diff, coeff, diff_stride, 64, shift); vp9_quantize_b_64x64(coeff, 4096, x->skip_block, p->zbin, p->round, p->quant, p->quant_shift, qcoeff, - dqcoeff, pd->dequant, p->zbin_extra, eob, + dqcoeff, pd->dequant, eob, scan_order->scan, scan_order->iscan); } if (!x->skip_encode && *eob) @@ -1633,7 +1633,7 @@ static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, vp9_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round, p->quant, p->quant_shift, qcoeff, - dqcoeff, pd->dequant, p->zbin_extra, eob, + dqcoeff, pd->dequant, eob, scan_order->scan, scan_order->iscan); } if (!x->skip_encode && *eob) { @@ -1665,7 +1665,7 @@ static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, vp9_tx_identity(src_diff, coeff, diff_stride, 16, shift); vp9_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round, p->quant, p->quant_shift, qcoeff, dqcoeff, - pd->dequant, p->zbin_extra, eob, scan_order->scan, + pd->dequant, eob, scan_order->scan, scan_order->iscan); } if (!x->skip_encode && *eob) { @@ -1697,7 +1697,7 @@ static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, vp9_tx_identity(src_diff, coeff, diff_stride, 8, shift); vp9_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, p->quant, p->quant_shift, qcoeff, dqcoeff, - pd->dequant, p->zbin_extra, eob, scan_order->scan, + pd->dequant, eob, scan_order->scan, scan_order->iscan); } if (!x->skip_encode && *eob) { @@ -1731,7 +1731,7 @@ static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, vp9_tx_identity(src_diff, coeff, diff_stride, 4, shift); vp9_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant, p->quant_shift, qcoeff, dqcoeff, - pd->dequant, p->zbin_extra, eob, scan_order->scan, + pd->dequant, eob, scan_order->scan, scan_order->iscan); } @@ -1769,8 +1769,7 @@ static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, vp9_highbd_fdct64x64(src_diff, coeff, diff_stride); vp9_highbd_quantize_b_64x64(coeff, 4096, x->skip_block, p->zbin, p->round, p->quant, p->quant_shift, - qcoeff, dqcoeff, pd->dequant, - p->zbin_extra, eob, + qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan, scan_order->iscan); if (!x->skip_encode && *eob) { vp9_highbd_idct64x64_add(dqcoeff, dst, dst_stride, *eob, xd->bd); @@ -1794,8 +1793,7 @@ static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride); vp9_highbd_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round, p->quant, p->quant_shift, - qcoeff, dqcoeff, pd->dequant, - p->zbin_extra, eob, + qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan, scan_order->iscan); } if (!x->skip_encode && *eob) { @@ -1819,7 +1817,7 @@ static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, vp9_highbd_fht16x16(src_diff, coeff, diff_stride, tx_type); vp9_highbd_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round, p->quant, p->quant_shift, qcoeff, dqcoeff, - pd->dequant, p->zbin_extra, eob, + pd->dequant, eob, scan_order->scan, scan_order->iscan); } if (!x->skip_encode && *eob) { @@ -1844,7 +1842,7 @@ static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, vp9_highbd_fht8x8(src_diff, coeff, diff_stride, tx_type); vp9_highbd_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, p->quant, p->quant_shift, qcoeff, dqcoeff, - pd->dequant, p->zbin_extra, eob, + pd->dequant, eob, scan_order->scan, scan_order->iscan); } if (!x->skip_encode && *eob) { @@ -1873,7 +1871,7 @@ static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, x->fwd_txm4x4(src_diff, coeff, diff_stride); vp9_highbd_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant, p->quant_shift, qcoeff, dqcoeff, - pd->dequant, p->zbin_extra, eob, + pd->dequant, eob, scan_order->scan, scan_order->iscan); } @@ -1922,7 +1920,7 @@ static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, vp9_fdct64x64(src_diff, coeff, diff_stride); vp9_quantize_b_64x64(coeff, 4096, x->skip_block, p->zbin, p->round, p->quant, p->quant_shift, qcoeff, dqcoeff, - pd->dequant, p->zbin_extra, eob, scan_order->scan, + pd->dequant, eob, scan_order->scan, scan_order->iscan); } if (!x->skip_encode && *eob) @@ -1945,7 +1943,7 @@ static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride); vp9_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round, p->quant, p->quant_shift, qcoeff, dqcoeff, - pd->dequant, p->zbin_extra, eob, scan_order->scan, + pd->dequant, eob, scan_order->scan, scan_order->iscan); } if (!x->skip_encode && *eob) @@ -1968,7 +1966,7 @@ static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, vp9_fht16x16(src_diff, coeff, diff_stride, tx_type); vp9_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round, p->quant, p->quant_shift, qcoeff, dqcoeff, - pd->dequant, p->zbin_extra, eob, scan_order->scan, + pd->dequant, eob, scan_order->scan, scan_order->iscan); } if (!x->skip_encode && *eob) @@ -1991,7 +1989,7 @@ static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, vp9_fht8x8(src_diff, coeff, diff_stride, tx_type); vp9_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, p->quant, p->quant_shift, qcoeff, dqcoeff, - pd->dequant, p->zbin_extra, eob, scan_order->scan, + pd->dequant, eob, scan_order->scan, scan_order->iscan); } if (!x->skip_encode && *eob) @@ -2018,7 +2016,7 @@ static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, x->fwd_txm4x4(src_diff, coeff, diff_stride); vp9_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant, p->quant_shift, qcoeff, dqcoeff, - pd->dequant, p->zbin_extra, eob, scan_order->scan, + pd->dequant, eob, scan_order->scan, scan_order->iscan); } diff --git a/vp9/encoder/vp9_quantize.c b/vp9/encoder/vp9_quantize.c index 7bd5b3cfa..035e5c117 100644 --- a/vp9/encoder/vp9_quantize.c +++ b/vp9/encoder/vp9_quantize.c @@ -172,14 +172,13 @@ void vp9_quantize_fp_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, - int zbin_oq_value, uint16_t *eob_ptr, + uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan) { int i, eob = -1; // TODO(jingning) Decide the need of these arguments after the // quantization process is completed. (void)zbin_ptr; (void)quant_shift_ptr; - (void)zbin_oq_value; (void)iscan; vpx_memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); @@ -218,7 +217,6 @@ void vp9_highbd_quantize_fp_c(const tran_low_t *coeff_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, - int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan) { @@ -228,7 +226,6 @@ void vp9_highbd_quantize_fp_c(const tran_low_t *coeff_ptr, // quantization process is completed. (void)zbin_ptr; (void)quant_shift_ptr; - (void)zbin_oq_value; (void)iscan; vpx_memset(qcoeff_ptr, 0, count * sizeof(*qcoeff_ptr)); @@ -270,7 +267,6 @@ static INLINE void quantize_fp_bigtx(const tran_low_t *coeff_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, - int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, @@ -278,7 +274,6 @@ static INLINE void quantize_fp_bigtx(const tran_low_t *coeff_ptr, int i, eob = -1; (void)zbin_ptr; (void)quant_shift_ptr; - (void)zbin_oq_value; (void)iscan; vpx_memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); @@ -318,14 +313,13 @@ void vp9_quantize_fp_32x32_c(const tran_low_t *coeff_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, - int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan) { quantize_fp_bigtx(coeff_ptr, n_coeffs, skip_block, zbin_ptr, round_ptr, quant_ptr, quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr, dequant_ptr, - zbin_oq_value, eob_ptr, scan, iscan, 0); + eob_ptr, scan, iscan, 0); } #if CONFIG_TX64X64 @@ -339,14 +333,13 @@ void vp9_quantize_fp_64x64_c(const tran_low_t *coeff_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, - int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan) { quantize_fp_bigtx(coeff_ptr, n_coeffs, skip_block, zbin_ptr, round_ptr, quant_ptr, quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr, dequant_ptr, - zbin_oq_value, eob_ptr, scan, iscan, 1); + eob_ptr, scan, iscan, 1); } #endif // CONFIG_TX64X64 @@ -361,7 +354,6 @@ static INLINE void highbd_quantize_fp_bigtx(const tran_low_t *coeff_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, - int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, @@ -369,7 +361,6 @@ static INLINE void highbd_quantize_fp_bigtx(const tran_low_t *coeff_ptr, int i, eob = -1; (void)zbin_ptr; (void)quant_shift_ptr; - (void)zbin_oq_value; (void)iscan; vpx_memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); @@ -409,14 +400,13 @@ void vp9_highbd_quantize_fp_32x32_c(const tran_low_t *coeff_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, - int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan) { highbd_quantize_fp_bigtx(coeff_ptr, n_coeffs, skip_block, zbin_ptr, round_ptr, quant_ptr, quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr, dequant_ptr, - zbin_oq_value, eob_ptr, scan, iscan, 0); + eob_ptr, scan, iscan, 0); } #if CONFIG_TX64X64 @@ -430,14 +420,13 @@ void vp9_highbd_quantize_fp_64x64_c(const tran_low_t *coeff_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, - int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan) { highbd_quantize_fp_bigtx(coeff_ptr, n_coeffs, skip_block, zbin_ptr, round_ptr, quant_ptr, quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr, dequant_ptr, - zbin_oq_value, eob_ptr, scan, iscan, 1); + eob_ptr, scan, iscan, 1); } #endif // CONFIG_TX64X64 #endif // CONFIG_VP9_HIGHBITDEPTH @@ -448,13 +437,11 @@ void vp9_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, - int zbin_oq_value, uint16_t *eob_ptr, + uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan) { int i, non_zero_count = (int)n_coeffs, eob = -1; - const int zbins[2] = { zbin_ptr[0] + zbin_oq_value, - zbin_ptr[1] + zbin_oq_value }; - const int nzbins[2] = { zbins[0] * -1, - zbins[1] * -1 }; + const int zbins[2] = {zbin_ptr[0], zbin_ptr[1]}; + const int nzbins[2] = {zbins[0] * -1, zbins[1] * -1}; (void)iscan; vpx_memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); @@ -501,17 +488,17 @@ void vp9_quantize_rect(const tran_low_t *coeff_ptr, int row, int col, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, - const int16_t *dequant_ptr, int zbin_oq_value, + const int16_t *dequant_ptr, int logsizeby32, int stride, int has_dc) { int r, c; - int zbins[2] = {ROUND_POWER_OF_TWO(zbin_ptr[0] + zbin_oq_value, + int zbins[2] = {ROUND_POWER_OF_TWO(zbin_ptr[0], 1 + (logsizeby32 < 0 ? -1 : logsizeby32)), - ROUND_POWER_OF_TWO(zbin_ptr[1] + zbin_oq_value, + ROUND_POWER_OF_TWO(zbin_ptr[1], 1 + (logsizeby32 < 0 ? -1 : logsizeby32))}; if (logsizeby32 < 0) { logsizeby32 = -1; - zbins[0] = zbin_ptr[0] + zbin_oq_value; - zbins[1] = zbin_ptr[1] + zbin_oq_value; + zbins[0] = zbin_ptr[0]; + zbins[1] = zbin_ptr[1]; } for (r = 0; r < row; r++) @@ -562,14 +549,12 @@ void vp9_highbd_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, - const int16_t *dequant_ptr, int zbin_oq_value, + const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan) { int i, non_zero_count = (int)n_coeffs, eob = -1; - const int zbins[2] = { zbin_ptr[0] + zbin_oq_value, - zbin_ptr[1] + zbin_oq_value }; - const int nzbins[2] = { zbins[0] * -1, - zbins[1] * -1 }; + const int zbins[2] = {zbin_ptr[0], zbin_ptr[1]}; + const int nzbins[2] = {zbins[0] * -1, zbins[1] * -1}; (void)iscan; vpx_memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); @@ -622,13 +607,12 @@ static INLINE void quantize_b_bigtx(const tran_low_t *coeff_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, - int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int logsizeby32) { - const int zbins[2] = { ROUND_POWER_OF_TWO(zbin_ptr[0] + zbin_oq_value, 1), - ROUND_POWER_OF_TWO(zbin_ptr[1] + zbin_oq_value, 1) }; + const int zbins[2] = {ROUND_POWER_OF_TWO(zbin_ptr[0], 1), + ROUND_POWER_OF_TWO(zbin_ptr[1], 1)}; const int nzbins[2] = {zbins[0] * -1, zbins[1] * -1}; int idx = 0; @@ -685,14 +669,13 @@ void vp9_quantize_b_32x32_c(const tran_low_t *coeff_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, - int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan) { quantize_b_bigtx(coeff_ptr, n_coeffs, skip_block, zbin_ptr, round_ptr, quant_ptr, quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr, dequant_ptr, - zbin_oq_value, eob_ptr, scan, iscan, 0); + eob_ptr, scan, iscan, 0); } #if CONFIG_TX64X64 @@ -706,14 +689,13 @@ void vp9_quantize_b_64x64_c(const tran_low_t *coeff_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, - int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan) { quantize_b_bigtx(coeff_ptr, n_coeffs, skip_block, zbin_ptr, round_ptr, quant_ptr, quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr, dequant_ptr, - zbin_oq_value, eob_ptr, scan, iscan, 1); + eob_ptr, scan, iscan, 1); } #endif // CONFIG_TX64X64 @@ -728,14 +710,13 @@ static INLINE void highbd_quantize_b_bigtx(const tran_low_t *coeff_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, - int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int logsizeby32) { - const int zbins[2] = { ROUND_POWER_OF_TWO(zbin_ptr[0] + zbin_oq_value, 1), - ROUND_POWER_OF_TWO(zbin_ptr[1] + zbin_oq_value, 1) }; - const int nzbins[2] = { zbins[0] * -1, zbins[1] * -1 }; + const int zbins[2] = {ROUND_POWER_OF_TWO(zbin_ptr[0], 1), + ROUND_POWER_OF_TWO(zbin_ptr[1], 1)}; + const int nzbins[2] = {zbins[0] * -1, zbins[1] * -1}; int idx = 0; int idx_arr[MAX_NUM_COEFS]; @@ -791,14 +772,13 @@ void vp9_highbd_quantize_b_32x32_c(const tran_low_t *coeff_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, - int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan) { highbd_quantize_b_bigtx(coeff_ptr, n_coeffs, skip_block, zbin_ptr, round_ptr, quant_ptr, quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr, dequant_ptr, - zbin_oq_value, eob_ptr, scan, iscan, 0); + eob_ptr, scan, iscan, 0); } #if CONFIG_TX64X64 @@ -812,14 +792,13 @@ void vp9_highbd_quantize_b_64x64_c(const tran_low_t *coeff_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, - int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan) { highbd_quantize_b_bigtx(coeff_ptr, n_coeffs, skip_block, zbin_ptr, round_ptr, quant_ptr, quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr, dequant_ptr, - zbin_oq_value, eob_ptr, scan, iscan, 1); + eob_ptr, scan, iscan, 1); } #endif // CONFIG_TX64X64 #endif // CONFIG_VP9_HIGHBITDEPTH @@ -837,7 +816,7 @@ void vp9_regular_quantize_b_4x4(MACROBLOCK *x, int plane, int block, p->zbin, p->round, p->quant, p->quant_shift, BLOCK_OFFSET(p->qcoeff, block), BLOCK_OFFSET(pd->dqcoeff, block), - pd->dequant, p->zbin_extra, &p->eobs[block], + pd->dequant, &p->eobs[block], scan, iscan); return; } @@ -847,7 +826,7 @@ void vp9_regular_quantize_b_4x4(MACROBLOCK *x, int plane, int block, p->zbin, p->round, p->quant, p->quant_shift, BLOCK_OFFSET(p->qcoeff, block), BLOCK_OFFSET(pd->dqcoeff, block), - pd->dequant, p->zbin_extra, &p->eobs[block], scan, iscan); + pd->dequant, &p->eobs[block], scan, iscan); } static void invert_quant(int16_t *quant, int16_t *shift, int d) { @@ -944,8 +923,6 @@ void vp9_init_plane_quantizers(VP9_COMP *cpi, MACROBLOCK *x) { const int segment_id = xd->mi[0].src_mi->mbmi.segment_id; const int qindex = vp9_get_qindex(&cm->seg, segment_id, cm->base_qindex); const int rdmult = vp9_compute_rd_mult(cpi, qindex + cm->y_dc_delta_q); - // TODO(paulwilkins): 0 value for zbin for now pending follow on patch. - const int zbin = 0; int i; // Y @@ -955,13 +932,10 @@ void vp9_init_plane_quantizers(VP9_COMP *cpi, MACROBLOCK *x) { x->plane[0].quant_shift = quants->y_quant_shift[qindex]; x->plane[0].zbin = quants->y_zbin[qindex]; x->plane[0].round = quants->y_round[qindex]; - x->plane[0].zbin_extra = (int16_t)((cm->y_dequant[qindex][1] * zbin) >> 7); xd->plane[0].dequant = cm->y_dequant[qindex]; - x->plane[0].quant_thred[0] = (x->plane[0].zbin[0] + x->plane[0].zbin_extra) * - (x->plane[0].zbin[0] + x->plane[0].zbin_extra); - x->plane[0].quant_thred[1] = (x->plane[0].zbin[1] + x->plane[0].zbin_extra) * - (x->plane[0].zbin[1] + x->plane[0].zbin_extra); + x->plane[0].quant_thred[0] = x->plane[0].zbin[0] * x->plane[0].zbin[0]; + x->plane[0].quant_thred[1] = x->plane[0].zbin[1] * x->plane[0].zbin[1]; // UV for (i = 1; i < 3; i++) { @@ -971,15 +945,10 @@ void vp9_init_plane_quantizers(VP9_COMP *cpi, MACROBLOCK *x) { x->plane[i].quant_shift = quants->uv_quant_shift[qindex]; x->plane[i].zbin = quants->uv_zbin[qindex]; x->plane[i].round = quants->uv_round[qindex]; - x->plane[i].zbin_extra = (int16_t)((cm->uv_dequant[qindex][1] * zbin) >> 7); xd->plane[i].dequant = cm->uv_dequant[qindex]; - x->plane[i].quant_thred[0] = - (x->plane[i].zbin[0] + x->plane[i].zbin_extra) * - (x->plane[i].zbin[0] + x->plane[i].zbin_extra); - x->plane[i].quant_thred[1] = - (x->plane[i].zbin[1] + x->plane[i].zbin_extra) * - (x->plane[i].zbin[1] + x->plane[i].zbin_extra); + x->plane[i].quant_thred[0] = x->plane[i].zbin[0] * x->plane[i].zbin[0]; + x->plane[i].quant_thred[1] = x->plane[i].zbin[1] * x->plane[i].zbin[1]; } x->skip_block = vp9_segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP); @@ -991,15 +960,6 @@ void vp9_init_plane_quantizers(VP9_COMP *cpi, MACROBLOCK *x) { vp9_initialize_me_consts(cpi, x->q_index); } -void vp9_update_zbin_extra(MACROBLOCK *x) { - const int y_zbin_extra = 0; - const int uv_zbin_extra = 0; - - x->plane[0].zbin_extra = (int16_t)y_zbin_extra; - x->plane[1].zbin_extra = (int16_t)uv_zbin_extra; - x->plane[2].zbin_extra = (int16_t)uv_zbin_extra; -} - void vp9_frame_init_quantizer(VP9_COMP *cpi) { vp9_init_plane_quantizers(cpi, &cpi->mb); } diff --git a/vp9/encoder/vp9_quantize.h b/vp9/encoder/vp9_quantize.h index 8abe3f348..fad9c7d6e 100644 --- a/vp9/encoder/vp9_quantize.h +++ b/vp9/encoder/vp9_quantize.h @@ -84,8 +84,6 @@ struct VP9Common; void vp9_frame_init_quantizer(struct VP9_COMP *cpi); -void vp9_update_zbin_extra(MACROBLOCK *x); - void vp9_init_plane_quantizers(struct VP9_COMP *cpi, MACROBLOCK *x); void vp9_init_quantizer(struct VP9_COMP *cpi); @@ -101,7 +99,7 @@ void vp9_quantize_rect(const tran_low_t *coeff_ptr, int row, int col, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, - const int16_t *dequant_ptr, int zbin_oq_value, + const int16_t *dequant_ptr, int logsizeby32, int stride, int has_dc); int get_eob(tran_low_t *qcoeff_ptr, intptr_t n_coeffs, const int16_t *scan); diff --git a/vp9/encoder/x86/vp9_highbd_quantize_intrin_sse2.c b/vp9/encoder/x86/vp9_highbd_quantize_intrin_sse2.c new file mode 100644 index 000000000..0bce9c321 --- /dev/null +++ b/vp9/encoder/x86/vp9_highbd_quantize_intrin_sse2.c @@ -0,0 +1,180 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +#include "vp9/common/vp9_common.h" + +#if CONFIG_VP9_HIGHBITDEPTH +// from vp9_idct.h: typedef int32_t tran_low_t; +void vp9_highbd_quantize_b_sse2(const tran_low_t *coeff_ptr, + intptr_t count, + int skip_block, + const int16_t *zbin_ptr, + const int16_t *round_ptr, + const int16_t *quant_ptr, + const int16_t *quant_shift_ptr, + tran_low_t *qcoeff_ptr, + tran_low_t *dqcoeff_ptr, + const int16_t *dequant_ptr, + uint16_t *eob_ptr, + const int16_t *scan, + const int16_t *iscan) { + int i, j, non_zero_regs = (int)count / 4, eob_i = -1; + __m128i zbins[2]; + __m128i nzbins[2]; + + zbins[0] = _mm_set_epi32((int)zbin_ptr[1], + (int)zbin_ptr[1], + (int)zbin_ptr[1], + (int)zbin_ptr[0]); + zbins[1] = _mm_set1_epi32((int)zbin_ptr[1]); + + nzbins[0] = _mm_setzero_si128(); + nzbins[1] = _mm_setzero_si128(); + nzbins[0] = _mm_sub_epi32(nzbins[0], zbins[0]); + nzbins[1] = _mm_sub_epi32(nzbins[1], zbins[1]); + + (void)scan; + + vpx_memset(qcoeff_ptr, 0, count * sizeof(*qcoeff_ptr)); + vpx_memset(dqcoeff_ptr, 0, count * sizeof(*dqcoeff_ptr)); + + if (!skip_block) { + // Pre-scan pass + for (i = ((int)count / 4) - 1; i >= 0; i--) { + __m128i coeffs, cmp1, cmp2; + int test; + coeffs = _mm_load_si128((const __m128i *)(coeff_ptr + i * 4)); + cmp1 = _mm_cmplt_epi32(coeffs, zbins[i != 0]); + cmp2 = _mm_cmpgt_epi32(coeffs, nzbins[i != 0]); + cmp1 = _mm_and_si128(cmp1, cmp2); + test = _mm_movemask_epi8(cmp1); + if (test == 0xffff) + non_zero_regs--; + else + break; + } + + // Quantization pass: + for (i = 0; i < non_zero_regs; i++) { + __m128i coeffs, coeffs_sign, tmp1, tmp2; + int test; + int abs_coeff[4]; + int coeff_sign[4]; + + coeffs = _mm_load_si128((const __m128i *)(coeff_ptr + i * 4)); + coeffs_sign = _mm_srai_epi32(coeffs, 31); + coeffs = _mm_sub_epi32( + _mm_xor_si128(coeffs, coeffs_sign), coeffs_sign); + tmp1 = _mm_cmpgt_epi32(coeffs, zbins[i != 0]); + tmp2 = _mm_cmpeq_epi32(coeffs, zbins[i != 0]); + tmp1 = _mm_or_si128(tmp1, tmp2); + test = _mm_movemask_epi8(tmp1); + _mm_storeu_si128((__m128i*)abs_coeff, coeffs); + _mm_storeu_si128((__m128i*)coeff_sign, coeffs_sign); + + for (j = 0; j < 4; j++) { + if (test & (1 << (4 * j))) { + int k = 4 * i + j; + int64_t tmp = clamp(abs_coeff[j] + round_ptr[k != 0], + INT32_MIN, INT32_MAX); + tmp = ((((tmp * quant_ptr[k != 0]) >> 16) + tmp) * + quant_shift_ptr[k != 0]) >> 16; // quantization + qcoeff_ptr[k] = (tmp ^ coeff_sign[j]) - coeff_sign[j]; + dqcoeff_ptr[k] = qcoeff_ptr[k] * dequant_ptr[k != 0]; + if (tmp) + eob_i = iscan[k] > eob_i ? iscan[k] : eob_i; + } + } + } + } + *eob_ptr = eob_i + 1; +} + + +void vp9_highbd_quantize_b_32x32_sse2(const tran_low_t *coeff_ptr, + intptr_t n_coeffs, + int skip_block, + const int16_t *zbin_ptr, + const int16_t *round_ptr, + const int16_t *quant_ptr, + const int16_t *quant_shift_ptr, + tran_low_t *qcoeff_ptr, + tran_low_t *dqcoeff_ptr, + const int16_t *dequant_ptr, + uint16_t *eob_ptr, + const int16_t *scan, + const int16_t *iscan) { + __m128i zbins[2]; + __m128i nzbins[2]; + int idx = 0; + int idx_arr[1024]; + int i, eob = -1; + const int zbin0_tmp = ROUND_POWER_OF_TWO(zbin_ptr[0], 1); + const int zbin1_tmp = ROUND_POWER_OF_TWO(zbin_ptr[1], 1); + (void)scan; + zbins[0] = _mm_set_epi32(zbin1_tmp, + zbin1_tmp, + zbin1_tmp, + zbin0_tmp); + zbins[1] = _mm_set1_epi32(zbin1_tmp); + + nzbins[0] = _mm_setzero_si128(); + nzbins[1] = _mm_setzero_si128(); + nzbins[0] = _mm_sub_epi32(nzbins[0], zbins[0]); + nzbins[1] = _mm_sub_epi32(nzbins[1], zbins[1]); + + vpx_memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); + vpx_memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); + + if (!skip_block) { + // Pre-scan pass + for (i = 0; i < n_coeffs / 4; i++) { + __m128i coeffs, cmp1, cmp2; + int test; + coeffs = _mm_load_si128((const __m128i *)(coeff_ptr + i * 4)); + cmp1 = _mm_cmplt_epi32(coeffs, zbins[i != 0]); + cmp2 = _mm_cmpgt_epi32(coeffs, nzbins[i != 0]); + cmp1 = _mm_and_si128(cmp1, cmp2); + test = _mm_movemask_epi8(cmp1); + if (!(test & 0xf)) + idx_arr[idx++] = i * 4; + if (!(test & 0xf0)) + idx_arr[idx++] = i * 4 + 1; + if (!(test & 0xf00)) + idx_arr[idx++] = i * 4 + 2; + if (!(test & 0xf000)) + idx_arr[idx++] = i * 4 + 3; + } + + // Quantization pass: only process the coefficients selected in + // pre-scan pass. Note: idx can be zero. + for (i = 0; i < idx; i++) { + const int rc = idx_arr[i]; + const int coeff = coeff_ptr[rc]; + const int coeff_sign = (coeff >> 31); + int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; + int64_t tmp = clamp(abs_coeff + + ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1), + INT32_MIN, INT32_MAX); + tmp = ((((tmp * quant_ptr[rc != 0]) >> 16) + tmp) * + quant_shift_ptr[rc != 0]) >> 15; + + qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign; + dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 2; + + if (tmp) + eob = iscan[idx_arr[i]] > eob ? iscan[idx_arr[i]] : eob; + } + } + *eob_ptr = eob + 1; +} +#endif diff --git a/vp9/encoder/x86/vp9_quantize_sse2.c b/vp9/encoder/x86/vp9_quantize_sse2.c index 7c1c8843c..679c66e30 100644 --- a/vp9/encoder/x86/vp9_quantize_sse2.c +++ b/vp9/encoder/x86/vp9_quantize_sse2.c @@ -18,7 +18,7 @@ void vp9_quantize_b_sse2(const int16_t* coeff_ptr, intptr_t n_coeffs, const int16_t* round_ptr, const int16_t* quant_ptr, const int16_t* quant_shift_ptr, int16_t* qcoeff_ptr, int16_t* dqcoeff_ptr, const int16_t* dequant_ptr, - int zbin_oq_value, uint16_t* eob_ptr, + uint16_t* eob_ptr, const int16_t* scan_ptr, const int16_t* iscan_ptr) { __m128i zero; @@ -39,13 +39,10 @@ void vp9_quantize_b_sse2(const int16_t* coeff_ptr, intptr_t n_coeffs, // Setup global values { - __m128i zbin_oq; __m128i pw_1; - zbin_oq = _mm_set1_epi16(zbin_oq_value); zbin = _mm_load_si128((const __m128i*)zbin_ptr); round = _mm_load_si128((const __m128i*)round_ptr); quant = _mm_load_si128((const __m128i*)quant_ptr); - zbin = _mm_add_epi16(zbin, zbin_oq); pw_1 = _mm_set1_epi16(1); zbin = _mm_sub_epi16(zbin, pw_1); dequant = _mm_load_si128((const __m128i*)dequant_ptr); @@ -223,3 +220,184 @@ void vp9_quantize_b_sse2(const int16_t* coeff_ptr, intptr_t n_coeffs, *eob_ptr = 0; } } + +void vp9_quantize_fp_sse2(const int16_t* coeff_ptr, intptr_t n_coeffs, + int skip_block, const int16_t* zbin_ptr, + const int16_t* round_ptr, const int16_t* quant_ptr, + const int16_t* quant_shift_ptr, int16_t* qcoeff_ptr, + int16_t* dqcoeff_ptr, const int16_t* dequant_ptr, + uint16_t* eob_ptr, + const int16_t* scan_ptr, + const int16_t* iscan_ptr) { + __m128i zero; + (void)scan_ptr; + (void)zbin_ptr; + (void)quant_shift_ptr; + + coeff_ptr += n_coeffs; + iscan_ptr += n_coeffs; + qcoeff_ptr += n_coeffs; + dqcoeff_ptr += n_coeffs; + n_coeffs = -n_coeffs; + zero = _mm_setzero_si128(); + + if (!skip_block) { + __m128i eob; + __m128i round, quant, dequant; + { + __m128i coeff0, coeff1; + + // Setup global values + { + round = _mm_load_si128((const __m128i*)round_ptr); + quant = _mm_load_si128((const __m128i*)quant_ptr); + dequant = _mm_load_si128((const __m128i*)dequant_ptr); + } + + { + __m128i coeff0_sign, coeff1_sign; + __m128i qcoeff0, qcoeff1; + __m128i qtmp0, qtmp1; + // Do DC and first 15 AC + coeff0 = _mm_load_si128((const __m128i*)(coeff_ptr + n_coeffs)); + coeff1 = _mm_load_si128((const __m128i*)(coeff_ptr + n_coeffs) + 1); + + // Poor man's sign extract + coeff0_sign = _mm_srai_epi16(coeff0, 15); + coeff1_sign = _mm_srai_epi16(coeff1, 15); + qcoeff0 = _mm_xor_si128(coeff0, coeff0_sign); + qcoeff1 = _mm_xor_si128(coeff1, coeff1_sign); + qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign); + qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign); + + qcoeff0 = _mm_adds_epi16(qcoeff0, round); + round = _mm_unpackhi_epi64(round, round); + qcoeff1 = _mm_adds_epi16(qcoeff1, round); + qtmp0 = _mm_mulhi_epi16(qcoeff0, quant); + quant = _mm_unpackhi_epi64(quant, quant); + qtmp1 = _mm_mulhi_epi16(qcoeff1, quant); + + // Reinsert signs + qcoeff0 = _mm_xor_si128(qtmp0, coeff0_sign); + qcoeff1 = _mm_xor_si128(qtmp1, coeff1_sign); + qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign); + qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign); + + _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs), qcoeff0); + _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs) + 1, qcoeff1); + + coeff0 = _mm_mullo_epi16(qcoeff0, dequant); + dequant = _mm_unpackhi_epi64(dequant, dequant); + coeff1 = _mm_mullo_epi16(qcoeff1, dequant); + + _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs), coeff0); + _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs) + 1, coeff1); + } + + { + // Scan for eob + __m128i zero_coeff0, zero_coeff1; + __m128i nzero_coeff0, nzero_coeff1; + __m128i iscan0, iscan1; + __m128i eob1; + zero_coeff0 = _mm_cmpeq_epi16(coeff0, zero); + zero_coeff1 = _mm_cmpeq_epi16(coeff1, zero); + nzero_coeff0 = _mm_cmpeq_epi16(zero_coeff0, zero); + nzero_coeff1 = _mm_cmpeq_epi16(zero_coeff1, zero); + iscan0 = _mm_load_si128((const __m128i*)(iscan_ptr + n_coeffs)); + iscan1 = _mm_load_si128((const __m128i*)(iscan_ptr + n_coeffs) + 1); + // Add one to convert from indices to counts + iscan0 = _mm_sub_epi16(iscan0, nzero_coeff0); + iscan1 = _mm_sub_epi16(iscan1, nzero_coeff1); + eob = _mm_and_si128(iscan0, nzero_coeff0); + eob1 = _mm_and_si128(iscan1, nzero_coeff1); + eob = _mm_max_epi16(eob, eob1); + } + n_coeffs += 8 * 2; + } + + // AC only loop + while (n_coeffs < 0) { + __m128i coeff0, coeff1; + { + __m128i coeff0_sign, coeff1_sign; + __m128i qcoeff0, qcoeff1; + __m128i qtmp0, qtmp1; + + coeff0 = _mm_load_si128((const __m128i*)(coeff_ptr + n_coeffs)); + coeff1 = _mm_load_si128((const __m128i*)(coeff_ptr + n_coeffs) + 1); + + // Poor man's sign extract + coeff0_sign = _mm_srai_epi16(coeff0, 15); + coeff1_sign = _mm_srai_epi16(coeff1, 15); + qcoeff0 = _mm_xor_si128(coeff0, coeff0_sign); + qcoeff1 = _mm_xor_si128(coeff1, coeff1_sign); + qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign); + qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign); + + qcoeff0 = _mm_adds_epi16(qcoeff0, round); + qcoeff1 = _mm_adds_epi16(qcoeff1, round); + qtmp0 = _mm_mulhi_epi16(qcoeff0, quant); + qtmp1 = _mm_mulhi_epi16(qcoeff1, quant); + + // Reinsert signs + qcoeff0 = _mm_xor_si128(qtmp0, coeff0_sign); + qcoeff1 = _mm_xor_si128(qtmp1, coeff1_sign); + qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign); + qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign); + + _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs), qcoeff0); + _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs) + 1, qcoeff1); + + coeff0 = _mm_mullo_epi16(qcoeff0, dequant); + coeff1 = _mm_mullo_epi16(qcoeff1, dequant); + + _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs), coeff0); + _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs) + 1, coeff1); + } + + { + // Scan for eob + __m128i zero_coeff0, zero_coeff1; + __m128i nzero_coeff0, nzero_coeff1; + __m128i iscan0, iscan1; + __m128i eob0, eob1; + zero_coeff0 = _mm_cmpeq_epi16(coeff0, zero); + zero_coeff1 = _mm_cmpeq_epi16(coeff1, zero); + nzero_coeff0 = _mm_cmpeq_epi16(zero_coeff0, zero); + nzero_coeff1 = _mm_cmpeq_epi16(zero_coeff1, zero); + iscan0 = _mm_load_si128((const __m128i*)(iscan_ptr + n_coeffs)); + iscan1 = _mm_load_si128((const __m128i*)(iscan_ptr + n_coeffs) + 1); + // Add one to convert from indices to counts + iscan0 = _mm_sub_epi16(iscan0, nzero_coeff0); + iscan1 = _mm_sub_epi16(iscan1, nzero_coeff1); + eob0 = _mm_and_si128(iscan0, nzero_coeff0); + eob1 = _mm_and_si128(iscan1, nzero_coeff1); + eob0 = _mm_max_epi16(eob0, eob1); + eob = _mm_max_epi16(eob, eob0); + } + n_coeffs += 8 * 2; + } + + // Accumulate EOB + { + __m128i eob_shuffled; + eob_shuffled = _mm_shuffle_epi32(eob, 0xe); + eob = _mm_max_epi16(eob, eob_shuffled); + eob_shuffled = _mm_shufflelo_epi16(eob, 0xe); + eob = _mm_max_epi16(eob, eob_shuffled); + eob_shuffled = _mm_shufflelo_epi16(eob, 0x1); + eob = _mm_max_epi16(eob, eob_shuffled); + *eob_ptr = _mm_extract_epi16(eob, 1); + } + } else { + do { + _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs), zero); + _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs) + 1, zero); + _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs), zero); + _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs) + 1, zero); + n_coeffs += 8 * 2; + } while (n_coeffs < 0); + *eob_ptr = 0; + } +} diff --git a/vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm b/vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm index 508e1d4f5..5d64e8444 100644 --- a/vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm +++ b/vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm @@ -17,7 +17,7 @@ SECTION .text %macro QUANTIZE_FN 2 cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \ - shift, qcoeff, dqcoeff, dequant, zbin_oq, \ + shift, qcoeff, dqcoeff, dequant, \ eob, scan, iscan cmp dword skipm, 0 jne .blank @@ -29,13 +29,9 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \ movifnidn zbinq, zbinmp movifnidn roundq, roundmp movifnidn quantq, quantmp - movd m4, dword zbin_oqm ; m4 = zbin_oq mova m0, [zbinq] ; m0 = zbin - punpcklwd m4, m4 mova m1, [roundq] ; m1 = round - pshufd m4, m4, 0 mova m2, [quantq] ; m2 = quant - paddw m0, m4 ; m0 = zbin + zbin_oq %ifidn %1, b_32x32 pcmpeqw m5, m5 psrlw m5, 15 @@ -55,7 +51,7 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \ psllw m4, 1 %endif pxor m5, m5 ; m5 = dedicated zero - DEFINE_ARGS coeff, ncoeff, d1, qcoeff, dqcoeff, iscan, d2, d3, d4, d5, d6, eob + DEFINE_ARGS coeff, ncoeff, d1, qcoeff, dqcoeff, iscan, d2, d3, d4, d5, eob lea coeffq, [ coeffq+ncoeffq*2] lea iscanq, [ iscanq+ncoeffq*2] lea qcoeffq, [ qcoeffq+ncoeffq*2] @@ -220,7 +216,7 @@ QUANTIZE_FN b_32x32, 7 %macro QUANTIZE_FP 2 cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \ - shift, qcoeff, dqcoeff, dequant, zbin_oq, \ + shift, qcoeff, dqcoeff, dequant, \ eob, scan, iscan cmp dword skipm, 0 jne .blank @@ -248,7 +244,7 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \ psllw m2, 1 %endif pxor m5, m5 ; m5 = dedicated zero - DEFINE_ARGS coeff, ncoeff, d1, qcoeff, dqcoeff, iscan, d2, d3, d4, d5, d6, eob + DEFINE_ARGS coeff, ncoeff, d1, qcoeff, dqcoeff, iscan, d2, d3, d4, d5, eob lea coeffq, [ coeffq+ncoeffq*2] lea iscanq, [ iscanq+ncoeffq*2] lea qcoeffq, [ qcoeffq+ncoeffq*2] diff --git a/vp9/vp9cx.mk b/vp9/vp9cx.mk index e72cb0024..0fdd89d61 100644 --- a/vp9/vp9cx.mk +++ b/vp9/vp9cx.mk @@ -102,6 +102,9 @@ VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_temporal_filter_apply_sse2.asm VP9_CX_SRCS-$(HAVE_SSE3) += encoder/x86/vp9_sad_sse3.asm VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_quantize_sse2.c +ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes) +VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_highbd_quantize_intrin_sse2.c +endif ifeq ($(CONFIG_USE_X86INC),yes) VP9_CX_SRCS-$(HAVE_MMX) += encoder/x86/vp9_dct_mmx.asm