diff --git a/vp10/common/blockd.h b/vp10/common/blockd.h index 87e5d1c27..5391d125d 100644 --- a/vp10/common/blockd.h +++ b/vp10/common/blockd.h @@ -19,6 +19,7 @@ #include "vpx_scale/yv12config.h" #include "vp10/common/common_data.h" +#include "vp10/common/quant_common.h" #include "vp10/common/entropy.h" #include "vp10/common/entropymode.h" #include "vp10/common/mv.h" @@ -215,6 +216,10 @@ typedef struct { #if CONFIG_EXT_PARTITION_TYPES PARTITION_TYPE partition; #endif +#if CONFIG_NEW_QUANT + int dq_off_index; + int send_dq_bit; +#endif // CONFIG_NEW_QUANT } MB_MODE_INFO; typedef struct MODE_INFO { @@ -261,6 +266,9 @@ typedef struct macroblockd_plane { ENTROPY_CONTEXT *above_context; ENTROPY_CONTEXT *left_context; int16_t seg_dequant[MAX_SEGMENTS][2]; +#if CONFIG_NEW_QUANT + dequant_val_type_nuq seg_dequant_nuq[MAX_SEGMENTS][COEF_BANDS]; +#endif uint8_t *color_index_map; // number of 4x4s in current block @@ -270,6 +278,9 @@ typedef struct macroblockd_plane { // encoder const int16_t *dequant; +#if CONFIG_NEW_QUANT + const dequant_val_type_nuq* dequant_val_nuq; +#endif // CONFIG_NEW_QUANT } MACROBLOCKD_PLANE; #define BLOCK_OFFSET(x, i) ((x) + (i) * 16) diff --git a/vp10/common/onyxc_int.h b/vp10/common/onyxc_int.h index cbfa8b627..3ea4f3aec 100644 --- a/vp10/common/onyxc_int.h +++ b/vp10/common/onyxc_int.h @@ -218,6 +218,10 @@ typedef struct VP10Common { int uv_ac_delta_q; int16_t y_dequant[MAX_SEGMENTS][2]; int16_t uv_dequant[MAX_SEGMENTS][2]; +#if CONFIG_NEW_QUANT + dequant_val_type_nuq y_dequant_nuq[MAX_SEGMENTS][COEF_BANDS]; + dequant_val_type_nuq uv_dequant_nuq[MAX_SEGMENTS][COEF_BANDS]; +#endif /* We allocate a MODE_INFO struct for each macroblock, together with an extra row on top and column on the left to simplify prediction. */ @@ -429,14 +433,21 @@ static INLINE int frame_is_intra_only(const VP10_COMMON *const cm) { static INLINE void vp10_init_macroblockd(VP10_COMMON *cm, MACROBLOCKD *xd, tran_low_t *dqcoeff) { int i; - for (i = 0; i < MAX_MB_PLANE; ++i) { xd->plane[i].dqcoeff = dqcoeff; xd->above_context[i] = cm->above_context[i]; if (xd->plane[i].plane_type == PLANE_TYPE_Y) { memcpy(xd->plane[i].seg_dequant, cm->y_dequant, sizeof(cm->y_dequant)); +#if CONFIG_NEW_QUANT + memcpy(xd->plane[i].seg_dequant_nuq, cm->y_dequant_nuq, + sizeof(cm->y_dequant_nuq)); +#endif } else { memcpy(xd->plane[i].seg_dequant, cm->uv_dequant, sizeof(cm->uv_dequant)); +#if CONFIG_NEW_QUANT + memcpy(xd->plane[i].seg_dequant_nuq, cm->uv_dequant_nuq, + sizeof(cm->uv_dequant_nuq)); +#endif } xd->fc = cm->fc; } diff --git a/vp10/common/quant_common.c b/vp10/common/quant_common.c index b1fb34dc2..f5886be84 100644 --- a/vp10/common/quant_common.c +++ b/vp10/common/quant_common.c @@ -34,18 +34,18 @@ static const uint8_t nuq_knots_lossless[COEF_BANDS][NUQ_KNOTS] = { // TODO(sarahparker) add multiple quantization profiles static const uint8_t nuq_knots[COEF_BANDS][NUQ_KNOTS] = { - {86, 122, 134}, // dc, band 0 + {91, 133, 139}, // dc, band 0 {78, 122, 134}, // band 1 - {78, 122, 134}, // band 2 - {84, 122, 133}, // band 3 - {88, 122, 134}, // band 4 - {88, 122, 134}, // band 5 + {83, 127, 139}, // band 2 + {84, 117, 128}, // band 3 + {88, 117, 129}, // band 4 + {93, 122, 134}, // band 5 }; // dequantization offsets static const uint8_t nuq_doff_lossless[COEF_BANDS] = {0, 0, 0, 0, 0, 0}; -static const uint8_t nuq_doff[COEF_BANDS] = {8, 15, 16, 22, 23, 24}; +static const uint8_t nuq_doff[COEF_BANDS] = {11, 12, 22, 18, 20, 21}; static const uint8_t *get_nuq_knots(int lossless, int band) { if (lossless) diff --git a/vp10/common/quant_common.h b/vp10/common/quant_common.h index 5be07931d..ebb82e8db 100644 --- a/vp10/common/quant_common.h +++ b/vp10/common/quant_common.h @@ -31,6 +31,8 @@ int vp10_get_qindex(const struct segmentation *seg, int segment_id, #if CONFIG_NEW_QUANT #define NUQ_KNOTS 3 +typedef tran_low_t dequant_val_type_nuq[NUQ_KNOTS + 1]; +typedef tran_low_t cuml_bins_type_nuq[NUQ_KNOTS]; void get_dequant_val_nuq(int q, int lossless, int band, tran_low_t *dq, tran_low_t *cumbins); tran_low_t dequant_abscoeff_nuq(int v, int q, const tran_low_t *dq); diff --git a/vp10/common/vp10_rtcd_defs.pl b/vp10/common/vp10_rtcd_defs.pl index 1e93f7d21..b30953d46 100644 --- a/vp10/common/vp10_rtcd_defs.pl +++ b/vp10/common/vp10_rtcd_defs.pl @@ -7,6 +7,7 @@ print <seg.enabled) { int i; for (i = 0; i < MAX_SEGMENTS; ++i) { @@ -2057,6 +2060,16 @@ static void setup_segmentation_dequant(VP10_COMMON *const cm) { cm->bit_depth); cm->uv_dequant[i][1] = vp10_ac_quant(qindex, cm->uv_ac_delta_q, cm->bit_depth); +#if CONFIG_NEW_QUANT + for (b = 0; b < COEF_BANDS; ++b) { + get_dequant_val_nuq( + cm->y_dequant[i][b != 0], qindex == 0, b, + cm->y_dequant_nuq[i][b], NULL); + get_dequant_val_nuq( + cm->uv_dequant[i][b != 0], qindex == 0, b, + cm->uv_dequant_nuq[i][b], NULL); + } +#endif } } else { const int qindex = cm->base_qindex; @@ -2068,6 +2081,16 @@ static void setup_segmentation_dequant(VP10_COMMON *const cm) { cm->bit_depth); cm->uv_dequant[0][1] = vp10_ac_quant(qindex, cm->uv_ac_delta_q, cm->bit_depth); +#if CONFIG_NEW_QUANT + for (b = 0; b < COEF_BANDS; ++b) { + get_dequant_val_nuq( + cm->y_dequant[0][b != 0], qindex == 0, b, + cm->y_dequant_nuq[0][b], NULL); + get_dequant_val_nuq( + cm->uv_dequant[0][b != 0], qindex == 0, b, + cm->uv_dequant_nuq[0][b], NULL); + } +#endif } } @@ -3057,7 +3080,6 @@ static size_t read_uncompressed_header(VP10Decoder *pbi, RefCntBuffer *const frame_bufs = pool->frame_bufs; int i, mask, ref_index = 0; size_t sz; - #if CONFIG_EXT_REFS cm->last3_frame_type = cm->last2_frame_type; cm->last2_frame_type = cm->last_frame_type; diff --git a/vp10/decoder/detokenize.c b/vp10/decoder/detokenize.c index 953af567c..000b30b36 100644 --- a/vp10/decoder/detokenize.c +++ b/vp10/decoder/detokenize.c @@ -49,6 +49,9 @@ static int decode_coefs(const MACROBLOCKD *xd, PLANE_TYPE type, tran_low_t *dqcoeff, TX_SIZE tx_size, TX_TYPE tx_type, const int16_t *dq, +#if CONFIG_NEW_QUANT + dequant_val_type_nuq *dq_val, +#endif // CONFIG_NEW_QUANT int ctx, const int16_t *scan, const int16_t *nb, vp10_reader *r) { FRAME_COUNTS *counts = xd->counts; @@ -66,6 +69,9 @@ static int decode_coefs(const MACROBLOCKD *xd, int dq_shift; int v, token; int16_t dqv = dq[0]; +#if CONFIG_NEW_QUANT + const tran_low_t *dqv_val = &dq_val[0][0]; +#endif // CONFIG_NEW_QUANT const uint8_t *cat1_prob; const uint8_t *cat2_prob; const uint8_t *cat3_prob; @@ -125,6 +131,10 @@ static int decode_coefs(const MACROBLOCKD *xd, break; } +#if CONFIG_NEW_QUANT + dqv_val = &dq_val[band][0]; +#endif // CONFIG_NEW_QUANT + while (!vp10_read(r, prob[ZERO_CONTEXT_NODE])) { INCREMENT_COUNT(ZERO_TOKEN); dqv = dq[1]; @@ -135,6 +145,9 @@ static int decode_coefs(const MACROBLOCKD *xd, ctx = get_coef_context(nb, token_cache, c); band = *band_translate++; prob = coef_probs[band][ctx]; +#if CONFIG_NEW_QUANT + dqv_val = &dq_val[band][0]; +#endif // CONFIG_NEW_QUANT } if (!vp10_read(r, prob[ONE_CONTEXT_NODE])) { @@ -191,7 +204,13 @@ static int decode_coefs(const MACROBLOCKD *xd, } } } +#if CONFIG_NEW_QUANT + v = dequant_abscoeff_nuq(val, dqv, dqv_val); + v = dq_shift ? ROUND_POWER_OF_TWO(v, dq_shift) : v; +#else v = (val * dqv) >> dq_shift; +#endif // CONFIG_NEW_QUANT + #if CONFIG_COEFFICIENT_RANGE_CHECKING #if CONFIG_VP9_HIGHBITDEPTH dqcoeff[scan[c]] = highbd_check_range((vp10_read_bit(r) ? -v : v), @@ -224,6 +243,9 @@ static int decode_coefs_ans(const MACROBLOCKD *const xd, tran_low_t *dqcoeff, TX_SIZE tx_size, TX_TYPE tx_type, const int16_t *dq, +#if CONFIG_NEW_QUANT + dequant_val_type_nuq *dq_val, +#endif // CONFIG_NEW_QUANT int ctx, const int16_t *scan, const int16_t *nb, struct AnsDecoder *const ans) { FRAME_COUNTS *counts = xd->counts; @@ -245,6 +267,9 @@ static int decode_coefs_ans(const MACROBLOCKD *const xd, int dq_shift; int v, token; int16_t dqv = dq[0]; +#if CONFIG_NEW_QUANT + const tran_low_t *dqv_val = &dq_val[0][0]; +#endif // CONFIG_NEW_QUANT const uint8_t *cat1_prob; const uint8_t *cat2_prob; const uint8_t *cat3_prob; @@ -306,6 +331,10 @@ static int decode_coefs_ans(const MACROBLOCKD *const xd, } } +#if CONFIG_NEW_QUANT + dqv_val = &dq_val[band][0]; +#endif // CONFIG_NEW_QUANT + cdf = &coef_cdfs[band][ctx]; token = ZERO_TOKEN + rans_read(ans, *cdf); if (token == ZERO_TOKEN) { @@ -359,7 +388,13 @@ static int decode_coefs_ans(const MACROBLOCKD *const xd, #endif } break; } - v = (val * dqv) >> dq_shift; +#if CONFIG_NEW_QUANT + v = dequant_abscoeff_nuq(val, dqv, dqv_val); + v = dq_shift ? ROUND_POWER_OF_TWO(v, dq_shift) : v; +#else + v = (val * dqv) >> dq_shift; +#endif // CONFIG_NEW_QUANT + #if CONFIG_COEFFICIENT_RANGE_CHECKING #if CONFIG_VP9_HIGHBITDEPTH dqcoeff[scan[c]] = @@ -474,11 +509,19 @@ int vp10_decode_block_tokens(MACROBLOCKD *const xd, #if !CONFIG_ANS const int eob = decode_coefs(xd, pd->plane_type, pd->dqcoeff, tx_size, tx_type, - dequant, ctx, sc->scan, sc->neighbors, r); + dequant, +#if CONFIG_NEW_QUANT + pd->seg_dequant_nuq[0], +#endif // CONFIG_NEW_QUANT + ctx, sc->scan, sc->neighbors, r); #else const int eob = decode_coefs_ans(xd, pd->plane_type, pd->dqcoeff, tx_size, tx_type, - dequant, ctx, sc->scan, sc->neighbors, r); + dequant, +#if CONFIG_NEW_QUANT + pd->seg_dequant_nuq[0], +#endif // CONFIG_NEW_QUANT + ctx, sc->scan, sc->neighbors, r); #endif // !CONFIG_ANS dec_set_contexts(xd, pd, tx_size, eob > 0, x, y); return eob; diff --git a/vp10/encoder/block.h b/vp10/encoder/block.h index d4adf0de5..6606e59cb 100644 --- a/vp10/encoder/block.h +++ b/vp10/encoder/block.h @@ -41,6 +41,9 @@ typedef struct macroblock_plane { int16_t *quant_shift; int16_t *zbin; int16_t *round; +#if CONFIG_NEW_QUANT + cuml_bins_type_nuq *cuml_bins_nuq; +#endif // CONFIG_NEW_QUANT int64_t quant_thred[2]; } MACROBLOCK_PLANE; diff --git a/vp10/encoder/encodemb.c b/vp10/encoder/encodemb.c index b9412cc5a..dfb72ea94 100644 --- a/vp10/encoder/encodemb.c +++ b/vp10/encoder/encodemb.c @@ -107,6 +107,9 @@ static int optimize_b(MACROBLOCK *mb, int plane, int block, const int default_eob = 16 << (tx_size << 1); int mul; const int16_t *dequant_ptr = pd->dequant; +#if CONFIG_NEW_QUANT + const dequant_val_type_nuq *dequant_val = pd->dequant_val_nuq; +#endif // CONFIG_NEW_QUANT const uint8_t *const band_translate = get_band_translate(tx_size); TX_TYPE tx_type = get_tx_type(type, xd, block, tx_size); const scan_order *const so = @@ -121,6 +124,7 @@ static int optimize_b(MACROBLOCK *mb, int plane, int block, int16_t t0, t1; EXTRABIT e0; int best, band, pt, i, final_eob; + int shift = get_tx_scale(xd, tx_type, tx_size); #if CONFIG_VP9_HIGHBITDEPTH const int *cat6_high_cost = vp10_get_high_cost_table(xd->bd); #else @@ -129,7 +133,7 @@ static int optimize_b(MACROBLOCK *mb, int plane, int block, assert((!type && !plane) || (type && plane)); assert(eob <= default_eob); - mul = 1 << get_tx_scale(xd, tx_type, tx_size); + mul = 1 << shift; /* Now set up a Viterbi trellis to evaluate alternative roundings. */ /* Initialize the sentinel node of the trellis. */ @@ -188,12 +192,23 @@ static int optimize_b(MACROBLOCK *mb, int plane, int block, rate0 = tokens[next][0].rate; rate1 = tokens[next][1].rate; +#if CONFIG_NEW_QUANT + shortcut = ( + (dequant_abscoeff_nuq( + abs(x), dequant_ptr[rc != 0], + dequant_val[band_translate[i]]) > abs(coeff[rc]) * mul) && + (dequant_abscoeff_nuq( + abs(x) - 1, dequant_ptr[rc != 0], + dequant_val[band_translate[i]]) < abs(coeff[rc]) * mul)); +#else // CONFIG_NEW_QUANT + if ((abs(x) * dequant_ptr[rc != 0] > abs(coeff[rc]) * mul) && (abs(x) * dequant_ptr[rc != 0] < abs(coeff[rc]) * mul + dequant_ptr[rc != 0])) shortcut = 1; else shortcut = 0; +#endif // CONFIG_NEW_QUANT if (shortcut) { sz = -(x < 0); @@ -232,6 +247,16 @@ static int optimize_b(MACROBLOCK *mb, int plane, int block, base_bits = vp10_get_cost(t0, e0, cat6_high_cost); if (shortcut) { +#if CONFIG_NEW_QUANT + dx = dequant_coeff_nuq( + x, dequant_ptr[rc != 0], + dequant_val[band_translate[i]]) - coeff[rc] * mul; +#if CONFIG_VP9_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + dx >>= xd->bd - 8; + } +#endif // CONFIG_VP9_HIGHBITDEPTH +#else // CONFIG_NEW_QUANT #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { dx -= ((dequant_ptr[rc != 0] >> (xd->bd - 8)) + sz) ^ sz; @@ -241,6 +266,7 @@ static int optimize_b(MACROBLOCK *mb, int plane, int block, #else dx -= (dequant_ptr[rc != 0] + sz) ^ sz; #endif // CONFIG_VP9_HIGHBITDEPTH +#endif // CONFIG_NEW_QUANT d2 = dx * dx; } tokens[i][1].rate = base_bits + (best ? rate1 : rate0); @@ -295,9 +321,15 @@ static int optimize_b(MACROBLOCK *mb, int plane, int block, if (x) { final_eob = i; } - qcoeff[rc] = x; +#if CONFIG_NEW_QUANT + dqcoeff[rc] = dequant_abscoeff_nuq(abs(x), dequant_ptr[rc != 0], + dequant_val[band_translate[i]]); + if (shift) dqcoeff[rc] = ROUND_POWER_OF_TWO(dqcoeff[rc], shift); + if (x < 0) dqcoeff[rc] = -dqcoeff[rc]; +#else dqcoeff[rc] = (x * dequant_ptr[rc != 0]) / mul; +#endif // CONFIG_NEW_QUANT next = tokens[i][best].next; best = best_index[i][best]; @@ -401,6 +433,469 @@ void vp10_xform_quant(MACROBLOCK *x, int plane, int block, int blk_row, } } +#if CONFIG_NEW_QUANT +void vp10_xform_quant_nuq(MACROBLOCK *x, int plane, int block, int blk_row, + int blk_col, BLOCK_SIZE plane_bsize, + TX_SIZE tx_size) { + MACROBLOCKD *const xd = &x->e_mbd; + const struct macroblock_plane *const p = &x->plane[plane]; + const struct macroblockd_plane *const pd = &xd->plane[plane]; + PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV; + TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size); + const scan_order *const scan_order = + get_scan(tx_size, tx_type, is_inter_block(&xd->mi[0]->mbmi)); + tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block); + tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block); + tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); + uint16_t *const eob = &p->eobs[block]; + const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize]; + const int16_t *src_diff; + const uint8_t* band = get_band_translate(tx_size); + + FWD_TXFM_PARAM fwd_txfm_param; + + fwd_txfm_param.tx_type = tx_type; + fwd_txfm_param.tx_size = tx_size; + fwd_txfm_param.fwd_txfm_opt = fwd_txfm_opt_list[VP10_XFORM_QUANT_B]; + fwd_txfm_param.rd_transform = x->use_lp32x32fdct; + fwd_txfm_param.lossless = xd->lossless[xd->mi[0]->mbmi.segment_id]; + + src_diff = &p->src_diff[4 * (blk_row * diff_stride + blk_col)]; + +// TODO(sarahparker) add all of these new quant quantize functions +// to quant_func_list, just trying to get this expr to work for now +#if CONFIG_VP9_HIGHBITDEPTH + fwd_txfm_param.bd = xd->bd; + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + highbd_fwd_txfm(src_diff, coeff, diff_stride, &fwd_txfm_param); + switch (tx_size) { + case TX_32X32: + highbd_quantize_32x32_nuq(coeff, 1024, x->skip_block, + p->quant, p->quant_shift, pd->dequant, + (const cuml_bins_type_nuq *) + p->cuml_bins_nuq, + (const dequant_val_type_nuq *) + pd->dequant_val_nuq, + qcoeff, dqcoeff, eob, + scan_order->scan, band); + break; + case TX_16X16: + highbd_quantize_nuq(coeff, 256, x->skip_block, + p->quant, p->quant_shift, pd->dequant, + (const cuml_bins_type_nuq *)p->cuml_bins_nuq, + (const dequant_val_type_nuq *) + pd->dequant_val_nuq, + qcoeff, dqcoeff, eob, + scan_order->scan, band); + break; + case TX_8X8: + highbd_quantize_nuq(coeff, 64, x->skip_block, + p->quant, p->quant_shift, pd->dequant, + (const cuml_bins_type_nuq *)p->cuml_bins_nuq, + (const dequant_val_type_nuq *) + pd->dequant_val_nuq, + qcoeff, dqcoeff, eob, + scan_order->scan, band); + break; + case TX_4X4: + highbd_quantize_nuq(coeff, 16, x->skip_block, + p->quant, p->quant_shift, pd->dequant, + (const cuml_bins_type_nuq *)p->cuml_bins_nuq, + (const dequant_val_type_nuq *) + pd->dequant_val_nuq, + qcoeff, dqcoeff, eob, + scan_order->scan, band); + break; + default: + assert(0); + } + return; + } +#endif // CONFIG_VP9_HIGHBITDEPTH + + fwd_txfm(src_diff, coeff, diff_stride, &fwd_txfm_param); + switch (tx_size) { + case TX_32X32: + quantize_32x32_nuq(coeff, 1024, x->skip_block, + p->quant, p->quant_shift, pd->dequant, + (const cuml_bins_type_nuq *)p->cuml_bins_nuq, + (const dequant_val_type_nuq *) + pd->dequant_val_nuq, + qcoeff, dqcoeff, eob, + scan_order->scan, band); + break; + case TX_16X16: + quantize_nuq(coeff, 256, x->skip_block, + p->quant, p->quant_shift, pd->dequant, + (const cuml_bins_type_nuq *)p->cuml_bins_nuq, + (const dequant_val_type_nuq *)pd->dequant_val_nuq, + qcoeff, dqcoeff, eob, + scan_order->scan, band); + break; + case TX_8X8: + quantize_nuq(coeff, 64, x->skip_block, + p->quant, p->quant_shift, pd->dequant, + (const cuml_bins_type_nuq *)p->cuml_bins_nuq, + (const dequant_val_type_nuq *)pd->dequant_val_nuq, + qcoeff, dqcoeff, eob, + scan_order->scan, band); + break; + case TX_4X4: + quantize_nuq(coeff, 16, x->skip_block, + p->quant, p->quant_shift, pd->dequant, + (const cuml_bins_type_nuq *)p->cuml_bins_nuq, + (const dequant_val_type_nuq *)pd->dequant_val_nuq, + qcoeff, dqcoeff, eob, + scan_order->scan, band); + break; + default: + assert(0); + break; + } +} + +void vp10_xform_quant_fp_nuq(MACROBLOCK *x, int plane, int block, int blk_row, + int blk_col, BLOCK_SIZE plane_bsize, + TX_SIZE tx_size) { + MACROBLOCKD *const xd = &x->e_mbd; + const struct macroblock_plane *const p = &x->plane[plane]; + const struct macroblockd_plane *const pd = &xd->plane[plane]; + PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV; + TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size); + const scan_order *const scan_order = + get_scan(tx_size, tx_type, is_inter_block(&xd->mi[0]->mbmi)); + tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block); + tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block); + tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); + uint16_t *const eob = &p->eobs[block]; + const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize]; + const int16_t *src_diff; + const uint8_t* band = get_band_translate(tx_size); + + FWD_TXFM_PARAM fwd_txfm_param; + + fwd_txfm_param.tx_type = tx_type; + fwd_txfm_param.tx_size = tx_size; + fwd_txfm_param.fwd_txfm_opt = fwd_txfm_opt_list[VP10_XFORM_QUANT_FP]; + fwd_txfm_param.rd_transform = x->use_lp32x32fdct; + fwd_txfm_param.lossless = xd->lossless[xd->mi[0]->mbmi.segment_id]; + + src_diff = &p->src_diff[4 * (blk_row * diff_stride + blk_col)]; + +// TODO(sarahparker) add all of these new quant quantize functions +// to quant_func_list, just trying to get this expr to work for now +#if CONFIG_VP9_HIGHBITDEPTH + fwd_txfm_param.bd = xd->bd; + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + highbd_fwd_txfm(src_diff, coeff, diff_stride, &fwd_txfm_param); + switch (tx_size) { + case TX_32X32: + highbd_quantize_32x32_fp_nuq(coeff, 1024, x->skip_block, + p->quant_fp, pd->dequant, + (const cuml_bins_type_nuq *) + p->cuml_bins_nuq, + (const dequant_val_type_nuq *) + pd->dequant_val_nuq, + qcoeff, dqcoeff, eob, + scan_order->scan, band); + break; + case TX_16X16: + highbd_quantize_fp_nuq(coeff, 256, x->skip_block, + p->quant_fp, pd->dequant, + (const cuml_bins_type_nuq *) + p->cuml_bins_nuq, + (const dequant_val_type_nuq *) + pd->dequant_val_nuq, + qcoeff, dqcoeff, eob, + scan_order->scan, band); + break; + case TX_8X8: + highbd_quantize_fp_nuq(coeff, 64, x->skip_block, + p->quant_fp, pd->dequant, + (const cuml_bins_type_nuq *) + p->cuml_bins_nuq, + (const dequant_val_type_nuq *) + pd->dequant_val_nuq, + qcoeff, dqcoeff, eob, + scan_order->scan, band); + break; + case TX_4X4: + highbd_quantize_fp_nuq(coeff, 16, x->skip_block, + p->quant_fp, pd->dequant, + (const cuml_bins_type_nuq *) + p->cuml_bins_nuq, + (const dequant_val_type_nuq *) + pd->dequant_val_nuq, + qcoeff, dqcoeff, eob, + scan_order->scan, band); + break; + default: + assert(0); + } + return; + } +#endif // CONFIG_VP9_HIGHBITDEPTH + + fwd_txfm(src_diff, coeff, diff_stride, &fwd_txfm_param); + switch (tx_size) { + case TX_32X32: + quantize_32x32_fp_nuq(coeff, 1024, x->skip_block, + p->quant_fp, pd->dequant, + (const cuml_bins_type_nuq *) + p->cuml_bins_nuq, + (const dequant_val_type_nuq *) + pd->dequant_val_nuq, + qcoeff, dqcoeff, eob, + scan_order->scan, band); + break; + case TX_16X16: + quantize_fp_nuq(coeff, 256, x->skip_block, + p->quant_fp, pd->dequant, + (const cuml_bins_type_nuq *) + p->cuml_bins_nuq, + (const dequant_val_type_nuq *) + pd->dequant_val_nuq, + qcoeff, dqcoeff, eob, + scan_order->scan, band); + break; + case TX_8X8: + quantize_fp_nuq(coeff, 64, x->skip_block, + p->quant_fp, pd->dequant, + (const cuml_bins_type_nuq *) + p->cuml_bins_nuq, + (const dequant_val_type_nuq *) + pd->dequant_val_nuq, + qcoeff, dqcoeff, eob, + scan_order->scan, band); + break; + case TX_4X4: + quantize_fp_nuq(coeff, 16, x->skip_block, + p->quant_fp, pd->dequant, + (const cuml_bins_type_nuq *) + p->cuml_bins_nuq, + (const dequant_val_type_nuq *) + pd->dequant_val_nuq, + qcoeff, dqcoeff, eob, + scan_order->scan, band); + break; + default: + assert(0); + break; + } +} + +void vp10_xform_quant_dc_nuq(MACROBLOCK *x, int plane, int block, int blk_row, + int blk_col, BLOCK_SIZE plane_bsize, + TX_SIZE tx_size) { + MACROBLOCKD *const xd = &x->e_mbd; + const struct macroblock_plane *const p = &x->plane[plane]; + const struct macroblockd_plane *const pd = &xd->plane[plane]; + PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV; + TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size); + tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block); + tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block); + tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); + uint16_t *const eob = &p->eobs[block]; + const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize]; + const int16_t *src_diff; + + FWD_TXFM_PARAM fwd_txfm_param; + + fwd_txfm_param.tx_type = tx_type; + fwd_txfm_param.tx_size = tx_size; + fwd_txfm_param.fwd_txfm_opt = fwd_txfm_opt_list[VP10_XFORM_QUANT_DC]; + fwd_txfm_param.rd_transform = x->use_lp32x32fdct; + fwd_txfm_param.lossless = xd->lossless[xd->mi[0]->mbmi.segment_id]; + + src_diff = &p->src_diff[4 * (blk_row * diff_stride + blk_col)]; + +// TODO(sarahparker) add all of these new quant quantize functions +// to quant_func_list, just trying to get this expr to work for now +#if CONFIG_VP9_HIGHBITDEPTH + fwd_txfm_param.bd = xd->bd; + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + highbd_fwd_txfm(src_diff, coeff, diff_stride, &fwd_txfm_param); + switch (tx_size) { + case TX_32X32: + highbd_quantize_dc_32x32_nuq(coeff, 1024, x->skip_block, + p->quant[0], p->quant_shift[0], + pd->dequant[0], + p->cuml_bins_nuq[0], + pd->dequant_val_nuq[0], + qcoeff, dqcoeff, eob); + break; + case TX_16X16: + highbd_quantize_dc_nuq(coeff, 256, x->skip_block, + p->quant[0], p->quant_shift[0], + pd->dequant[0], + p->cuml_bins_nuq[0], + pd->dequant_val_nuq[0], + qcoeff, dqcoeff, eob); + break; + case TX_8X8: + highbd_quantize_dc_nuq(coeff, 64, x->skip_block, + p->quant[0], p->quant_shift[0], + pd->dequant[0], + p->cuml_bins_nuq[0], + pd->dequant_val_nuq[0], + qcoeff, dqcoeff, eob); + break; + case TX_4X4: + highbd_quantize_dc_nuq(coeff, 16, x->skip_block, + p->quant[0], p->quant_shift[0], + pd->dequant[0], + p->cuml_bins_nuq[0], + pd->dequant_val_nuq[0], + qcoeff, dqcoeff, eob); + break; + default: + assert(0); + } + return; + } +#endif // CONFIG_VP9_HIGHBITDEPTH + + fwd_txfm(src_diff, coeff, diff_stride, &fwd_txfm_param); + switch (tx_size) { + case TX_32X32: + quantize_dc_32x32_nuq(coeff, 1024, x->skip_block, + p->quant[0], p->quant_shift[0], pd->dequant[0], + p->cuml_bins_nuq[0], + pd->dequant_val_nuq[0], + qcoeff, dqcoeff, eob); + break; + case TX_16X16: + quantize_dc_nuq(coeff, 256, x->skip_block, + p->quant[0], p->quant_shift[0], pd->dequant[0], + p->cuml_bins_nuq[0], + pd->dequant_val_nuq[0], + qcoeff, dqcoeff, eob); + break; + case TX_8X8: + quantize_dc_nuq(coeff, 64, x->skip_block, + p->quant[0], p->quant_shift[0], pd->dequant[0], + p->cuml_bins_nuq[0], + pd->dequant_val_nuq[0], + qcoeff, dqcoeff, eob); + break; + case TX_4X4: + quantize_dc_nuq(coeff, 16, x->skip_block, + p->quant[0], p->quant_shift[0], pd->dequant[0], + p->cuml_bins_nuq[0], + pd->dequant_val_nuq[0], + qcoeff, dqcoeff, eob); + break; + default: + assert(0); + break; + } +} + +void vp10_xform_quant_dc_fp_nuq(MACROBLOCK *x, int plane, int block, + int blk_row, int blk_col, + BLOCK_SIZE plane_bsize, TX_SIZE tx_size) { + MACROBLOCKD *const xd = &x->e_mbd; + const struct macroblock_plane *const p = &x->plane[plane]; + const struct macroblockd_plane *const pd = &xd->plane[plane]; + PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV; + TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size); + tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block); + tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block); + tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); + uint16_t *const eob = &p->eobs[block]; + const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize]; + const int16_t *src_diff; + + FWD_TXFM_PARAM fwd_txfm_param; + + fwd_txfm_param.tx_type = tx_type; + fwd_txfm_param.tx_size = tx_size; + fwd_txfm_param.fwd_txfm_opt = fwd_txfm_opt_list[VP10_XFORM_QUANT_DC]; + fwd_txfm_param.rd_transform = x->use_lp32x32fdct; + fwd_txfm_param.lossless = xd->lossless[xd->mi[0]->mbmi.segment_id]; + + src_diff = &p->src_diff[4 * (blk_row * diff_stride + blk_col)]; + +// TODO(sarahparker) add all of these new quant quantize functions +// to quant_func_list, just trying to get this expr to work for now +#if CONFIG_VP9_HIGHBITDEPTH + fwd_txfm_param.bd = xd->bd; + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + highbd_fwd_txfm(src_diff, coeff, diff_stride, &fwd_txfm_param); + switch (tx_size) { + case TX_32X32: + highbd_quantize_dc_32x32_fp_nuq(coeff, 1024, x->skip_block, + p->quant_fp[0], pd->dequant[0], + p->cuml_bins_nuq[0], + pd->dequant_val_nuq[0], + qcoeff, dqcoeff, eob); + break; + case TX_16X16: + highbd_quantize_dc_fp_nuq(coeff, 256, x->skip_block, + p->quant_fp[0], pd->dequant[0], + p->cuml_bins_nuq[0], + pd->dequant_val_nuq[0], + qcoeff, dqcoeff, eob); + break; + case TX_8X8: + highbd_quantize_dc_fp_nuq(coeff, 64, x->skip_block, + p->quant_fp[0], pd->dequant[0], + p->cuml_bins_nuq[0], + pd->dequant_val_nuq[0], + qcoeff, dqcoeff, eob); + break; + case TX_4X4: + highbd_quantize_dc_fp_nuq(coeff, 16, x->skip_block, + p->quant_fp[0], pd->dequant[0], + p->cuml_bins_nuq[0], + pd->dequant_val_nuq[0], + qcoeff, dqcoeff, eob); + break; + default: + assert(0); + } + return; + } +#endif // CONFIG_VP9_HIGHBITDEPTH + + fwd_txfm(src_diff, coeff, diff_stride, &fwd_txfm_param); + switch (tx_size) { + case TX_32X32: + quantize_dc_32x32_fp_nuq(coeff, 1024, x->skip_block, + p->quant_fp[0], pd->dequant[0], + p->cuml_bins_nuq[0], + pd->dequant_val_nuq[0], + qcoeff, dqcoeff, eob); + break; + case TX_16X16: + quantize_dc_fp_nuq(coeff, 256, x->skip_block, + p->quant_fp[0], pd->dequant[0], + p->cuml_bins_nuq[0], + pd->dequant_val_nuq[0], + qcoeff, dqcoeff, eob); + + break; + case TX_8X8: + quantize_dc_fp_nuq(coeff, 64, x->skip_block, + p->quant_fp[0], pd->dequant[0], + p->cuml_bins_nuq[0], + pd->dequant_val_nuq[0], + qcoeff, dqcoeff, eob); + break; + case TX_4X4: + quantize_dc_fp_nuq(coeff, 16, x->skip_block, + p->quant_fp[0], pd->dequant[0], + p->cuml_bins_nuq[0], + pd->dequant_val_nuq[0], + qcoeff, dqcoeff, eob); + break; + default: + assert(0); + break; + } +} +#endif // CONFIG_NEW_QUANT + static void encode_block(int plane, int block, int blk_row, int blk_col, BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) { @@ -448,20 +943,35 @@ static void encode_block(int plane, int block, int blk_row, int blk_col, *a = *l = 0; return; } else { +#if CONFIG_NEW_QUANT + vp10_xform_quant_fp_nuq(x, plane, block, blk_row, blk_col, plane_bsize, + tx_size); +#else vp10_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize, tx_size, VP10_XFORM_QUANT_FP); +#endif } } else { if (max_txsize_lookup[plane_bsize] == tx_size) { int blk_index = (block >> (tx_size << 1)); if (x->skip_txfm[plane][blk_index] == SKIP_TXFM_NONE) { // full forward transform and quantization +#if CONFIG_NEW_QUANT + vp10_xform_quant_nuq(x, plane, block, blk_row, blk_col, plane_bsize, + tx_size); +#else vp10_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize, tx_size, VP10_XFORM_QUANT_B); +#endif // CONFIG_NEW_QUANT } else if (x->skip_txfm[plane][blk_index] == SKIP_TXFM_AC_ONLY) { // fast path forward transform and quantization +#if CONFIG_NEW_QUANT + vp10_xform_quant_dc_nuq(x, plane, block, blk_row, blk_col, + plane_bsize, tx_size); +#else vp10_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize, tx_size, VP10_XFORM_QUANT_DC); +#endif // CONFIG_NEW_QUANT } else { // skip forward transform p->eobs[block] = 0; @@ -471,8 +981,13 @@ static void encode_block(int plane, int block, int blk_row, int blk_col, #endif } } else { +#if CONFIG_NEW_QUANT + vp10_xform_quant_nuq(x, plane, block, blk_row, blk_col, plane_bsize, + tx_size); +#else vp10_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize, tx_size, VP10_XFORM_QUANT_B); +#endif // CONFIG_NEW_QUANT } } } @@ -603,8 +1118,13 @@ static void encode_block_pass1(int plane, int block, int blk_row, int blk_col, uint8_t *dst; dst = &pd->dst.buf[4 * blk_row * pd->dst.stride + 4 * blk_col]; +#if CONFIG_NEW_QUANT + vp10_xform_quant_nuq(x, plane, block, blk_row, blk_col, plane_bsize, + tx_size); +#else vp10_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize, tx_size, VP10_XFORM_QUANT_B); +#endif // CONFIG_NEW_QUANT if (p->eobs[block] > 0) { #if CONFIG_VP9_HIGHBITDEPTH @@ -733,7 +1253,6 @@ void vp10_encode_block_intra(int plane, int block, int blk_row, int blk_col, uint16_t *eob = &p->eobs[block]; const int src_stride = p->src.stride; const int dst_stride = pd->dst.stride; - const int tx1d_size = get_tx1d_size(tx_size); INV_TXFM_PARAM inv_txfm_param; @@ -758,8 +1277,13 @@ void vp10_encode_block_intra(int plane, int block, int blk_row, int blk_col, src_stride, dst, dst_stride); #endif // CONFIG_VP9_HIGHBITDEPTH +#if CONFIG_NEW_QUANT + vp10_xform_quant_nuq(x, plane, block, blk_row, blk_col, plane_bsize, + tx_size); +#else // CONFIG_NEW_QUANT vp10_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize, tx_size, VP10_XFORM_QUANT_B); +#endif // CONFIG_NEW_QUANT if (args->ctx != NULL) { struct optimize_ctx *const ctx = args->ctx; diff --git a/vp10/encoder/encodemb.h b/vp10/encoder/encodemb.h index cbe15aadf..eae1db799 100644 --- a/vp10/encoder/encodemb.h +++ b/vp10/encoder/encodemb.h @@ -41,6 +41,20 @@ void vp10_xform_quant(MACROBLOCK *x, int plane, int block, int blk_row, int blk_col, BLOCK_SIZE plane_bsize, TX_SIZE tx_size, VP10_XFORM_QUANT xform_quant_idx); +#if CONFIG_NEW_QUANT +void vp10_xform_quant_nuq(MACROBLOCK *x, int plane, int block, int blk_row, + int blk_col, BLOCK_SIZE plane_bsize, + TX_SIZE tx_size); +void vp10_xform_quant_dc_nuq(MACROBLOCK *x, int plane, int block, int blk_row, + int blk_col, BLOCK_SIZE plane_bsize, + TX_SIZE tx_size); +void vp10_xform_quant_fp_nuq(MACROBLOCK *x, int plane, int block, int blk_row, + int blk_col, BLOCK_SIZE plane_bsize, + TX_SIZE tx_size); +void vp10_xform_quant_dc_fp_nuq(MACROBLOCK *x, int plane, int block, + int blk_row, int blk_col, + BLOCK_SIZE plane_bsize, TX_SIZE tx_size); +#endif void vp10_subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane); diff --git a/vp10/encoder/encoder.h b/vp10/encoder/encoder.h index 7b7bd7d52..4878c0052 100644 --- a/vp10/encoder/encoder.h +++ b/vp10/encoder/encoder.h @@ -336,6 +336,12 @@ typedef struct VP10_COMP { MB_MODE_INFO_EXT *mbmi_ext_base; DECLARE_ALIGNED(16, int16_t, y_dequant[QINDEX_RANGE][8]); // 8: SIMD width DECLARE_ALIGNED(16, int16_t, uv_dequant[QINDEX_RANGE][8]); // 8: SIMD width +#if CONFIG_NEW_QUANT + DECLARE_ALIGNED(16, dequant_val_type_nuq, + y_dequant_val_nuq[QINDEX_RANGE][COEF_BANDS]); + DECLARE_ALIGNED(16, dequant_val_type_nuq, + uv_dequant_val_nuq[QINDEX_RANGE][COEF_BANDS]); +#endif // CONFIG_NEW_QUANT VP10_COMMON common; VP10EncoderConfig oxcf; struct lookahead_ctx *lookahead; diff --git a/vp10/encoder/quantize.c b/vp10/encoder/quantize.c index 2c61de5bd..2a8b33f6c 100644 --- a/vp10/encoder/quantize.c +++ b/vp10/encoder/quantize.c @@ -22,6 +22,405 @@ #include "vp10/encoder/quantize.h" #include "vp10/encoder/rd.h" +#if CONFIG_NEW_QUANT +static INLINE int quantize_coeff_nuq(const tran_low_t coeffv, + const int16_t quant, + const int16_t quant_shift, + const int16_t dequant, + const tran_low_t *cuml_bins_ptr, + const tran_low_t *dequant_val, + tran_low_t *qcoeff_ptr, + tran_low_t *dqcoeff_ptr) { + const int coeff = coeffv; + const int coeff_sign = (coeff >> 31); + const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; + int i, q; + int tmp = clamp(abs_coeff, INT16_MIN, INT16_MAX); + for (i = 0; i < NUQ_KNOTS; i++) { + if (tmp < cuml_bins_ptr[i]) { + q = i; + break; + } + } + if (i == NUQ_KNOTS) { + tmp -= cuml_bins_ptr[NUQ_KNOTS - 1]; + q = NUQ_KNOTS + (((((tmp * quant) >> 16) + tmp) * quant_shift) >> 16); + } + if (q) { + *dqcoeff_ptr = + dequant_abscoeff_nuq(q, dequant, dequant_val); + *qcoeff_ptr = (q ^ coeff_sign) - coeff_sign; + *dqcoeff_ptr = *qcoeff_ptr < 0 ? -*dqcoeff_ptr : *dqcoeff_ptr; + } else { + *qcoeff_ptr = 0; + *dqcoeff_ptr = 0; + } + return (q != 0); +} + +static INLINE int quantize_coeff_bigtx_nuq(const tran_low_t coeffv, + const int16_t quant, + const int16_t quant_shift, + const int16_t dequant, + const tran_low_t *cuml_bins_ptr, + const tran_low_t *dequant_val, + tran_low_t *qcoeff_ptr, + tran_low_t *dqcoeff_ptr, + int logsizeby32) { + const int coeff = coeffv; + const int coeff_sign = (coeff >> 31); + const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; + int i, q; + int tmp = clamp(abs_coeff, INT16_MIN, INT16_MAX); + for (i = 0; i < NUQ_KNOTS; i++) { + if (tmp < ROUND_POWER_OF_TWO(cuml_bins_ptr[i], 1 + logsizeby32)) { + q = i; + break; + } + } + if (i == NUQ_KNOTS) { + tmp -= ROUND_POWER_OF_TWO(cuml_bins_ptr[NUQ_KNOTS - 1], 1 + logsizeby32); + q = NUQ_KNOTS + + (((((tmp * quant) >> 16) + tmp) * quant_shift) >> (15 - logsizeby32)); + } + if (q) { + *dqcoeff_ptr = + ROUND_POWER_OF_TWO(dequant_abscoeff_nuq(q, dequant, dequant_val), + 1 + logsizeby32); + // *dqcoeff_ptr = dequant_abscoeff_nuq(q, dequant, dequant_val) >> + // (1 + logsizeby32); + *qcoeff_ptr = (q ^ coeff_sign) - coeff_sign; + *dqcoeff_ptr = *qcoeff_ptr < 0 ? -*dqcoeff_ptr : *dqcoeff_ptr; + } else { + *qcoeff_ptr = 0; + *dqcoeff_ptr = 0; + } + return (q != 0); +} + +static INLINE int quantize_coeff_fp_nuq(const tran_low_t coeffv, + const int16_t quant, + const int16_t dequant, + const tran_low_t *cuml_bins_ptr, + const tran_low_t *dequant_val, + tran_low_t *qcoeff_ptr, + tran_low_t *dqcoeff_ptr) { + const int coeff = coeffv; + const int coeff_sign = (coeff >> 31); + const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; + int i, q; + int tmp = clamp(abs_coeff, INT16_MIN, INT16_MAX); + for (i = 0; i < NUQ_KNOTS; i++) { + if (tmp < cuml_bins_ptr[i]) { + q = i; + break; + } + } + if (i == NUQ_KNOTS) { + q = NUQ_KNOTS + + ((((int64_t)tmp - cuml_bins_ptr[NUQ_KNOTS - 1]) * quant) >> 16); + } + if (q) { + *dqcoeff_ptr = + dequant_abscoeff_nuq(q, dequant, dequant_val); + *qcoeff_ptr = (q ^ coeff_sign) - coeff_sign; + *dqcoeff_ptr = *qcoeff_ptr < 0 ? -*dqcoeff_ptr : *dqcoeff_ptr; + } else { + *qcoeff_ptr = 0; + *dqcoeff_ptr = 0; + } + return (q != 0); +} + +static INLINE int quantize_coeff_bigtx_fp_nuq(const tran_low_t coeffv, + const int16_t quant, + const int16_t dequant, + const tran_low_t *cuml_bins_ptr, + const tran_low_t *dequant_val, + tran_low_t *qcoeff_ptr, + tran_low_t *dqcoeff_ptr, + int logsizeby32) { + const int coeff = coeffv; + const int coeff_sign = (coeff >> 31); + const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; + int i, q; + int tmp = clamp(abs_coeff, INT16_MIN, INT16_MAX); + for (i = 0; i < NUQ_KNOTS; i++) { + if (tmp < ROUND_POWER_OF_TWO(cuml_bins_ptr[i], 1 + logsizeby32)) { + q = i; + break; + } + } + if (i == NUQ_KNOTS) { + q = NUQ_KNOTS + + ((((int64_t)tmp - ROUND_POWER_OF_TWO(cuml_bins_ptr[NUQ_KNOTS - 1], + 1 + logsizeby32)) * quant) >> + (15 - logsizeby32)); + } + if (q) { + *dqcoeff_ptr = + ROUND_POWER_OF_TWO(dequant_abscoeff_nuq(q, dequant, dequant_val), + 1 + logsizeby32); + // *dqcoeff_ptr = dequant_abscoeff_nuq(q, dequant, dequant_val) >> + // (1 + logsizeby32); + *qcoeff_ptr = (q ^ coeff_sign) - coeff_sign; + *dqcoeff_ptr = *qcoeff_ptr < 0 ? -*dqcoeff_ptr : *dqcoeff_ptr; + } else { + *qcoeff_ptr = 0; + *dqcoeff_ptr = 0; + } + return (q != 0); +} + +void quantize_dc_nuq(const tran_low_t *coeff_ptr, + intptr_t n_coeffs, + int skip_block, + const int16_t quant, + const int16_t quant_shift, + const int16_t dequant, + const tran_low_t *cuml_bins_ptr, + const tran_low_t *dequant_val, + tran_low_t *qcoeff_ptr, + tran_low_t *dqcoeff_ptr, + uint16_t *eob_ptr) { + int eob = -1; + memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); + memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); + if (!skip_block) { + const int rc = 0; + if (quantize_coeff_nuq(coeff_ptr[rc], + quant, + quant_shift, + dequant, + cuml_bins_ptr, + dequant_val, + qcoeff_ptr, + dqcoeff_ptr)) + eob = 0; + } + *eob_ptr = eob + 1; +} + +void quantize_dc_fp_nuq(const tran_low_t *coeff_ptr, + intptr_t n_coeffs, + int skip_block, + const int16_t quant, + const int16_t dequant, + const tran_low_t *cuml_bins_ptr, + const tran_low_t *dequant_val, + tran_low_t *qcoeff_ptr, + tran_low_t *dqcoeff_ptr, + uint16_t *eob_ptr) { + int eob = -1; + memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); + memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); + if (!skip_block) { + const int rc = 0; + if (quantize_coeff_fp_nuq(coeff_ptr[rc], + quant, + dequant, + cuml_bins_ptr, + dequant_val, + qcoeff_ptr, + dqcoeff_ptr)) + eob = 0; + } + *eob_ptr = eob + 1; +} + +void quantize_dc_32x32_nuq(const tran_low_t *coeff_ptr, + intptr_t n_coeffs, + int skip_block, + const int16_t quant, + const int16_t quant_shift, + const int16_t dequant, + const tran_low_t *cuml_bins_ptr, + const tran_low_t *dequant_val, + tran_low_t *qcoeff_ptr, + tran_low_t *dqcoeff_ptr, + uint16_t *eob_ptr) { + int eob = -1; + memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); + memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); + if (!skip_block) { + const int rc = 0; + if (quantize_coeff_bigtx_nuq(coeff_ptr[rc], + quant, + quant_shift, + dequant, + cuml_bins_ptr, + dequant_val, + qcoeff_ptr, + dqcoeff_ptr, + 0)) + eob = 0; + } + *eob_ptr = eob + 1; +} + +void quantize_dc_32x32_fp_nuq(const tran_low_t *coeff_ptr, + intptr_t n_coeffs, + int skip_block, + const int16_t quant, + const int16_t dequant, + const tran_low_t *cuml_bins_ptr, + const tran_low_t *dequant_val, + tran_low_t *qcoeff_ptr, + tran_low_t *dqcoeff_ptr, + uint16_t *eob_ptr) { + int eob = -1; + memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); + memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); + if (!skip_block) { + const int rc = 0; + if (quantize_coeff_bigtx_fp_nuq(coeff_ptr[rc], + quant, + dequant, + cuml_bins_ptr, + dequant_val, + qcoeff_ptr, + dqcoeff_ptr, + 0)) + eob = 0; + } + *eob_ptr = eob + 1; +} + +void quantize_nuq_c(const tran_low_t *coeff_ptr, + intptr_t n_coeffs, + int skip_block, + const int16_t *quant_ptr, + const int16_t *quant_shift_ptr, + const int16_t *dequant_ptr, + const cuml_bins_type_nuq *cuml_bins_ptr, + const dequant_val_type_nuq *dequant_val, + tran_low_t *qcoeff_ptr, + tran_low_t *dqcoeff_ptr, + uint16_t *eob_ptr, + const int16_t *scan, + const uint8_t *band) { + int eob = -1; + memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); + memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); + if (!skip_block) { + int i; + for (i = 0; i < n_coeffs; i++) { + const int rc = scan[i]; + if (quantize_coeff_nuq(coeff_ptr[rc], + quant_ptr[rc != 0], + quant_shift_ptr[rc != 0], + dequant_ptr[rc != 0], + cuml_bins_ptr[band[i]], + dequant_val[band[i]], + &qcoeff_ptr[rc], + &dqcoeff_ptr[rc])) + eob = i; + } + } + *eob_ptr = eob + 1; +} + +void quantize_fp_nuq_c(const tran_low_t *coeff_ptr, + intptr_t n_coeffs, + int skip_block, + const int16_t *quant_ptr, + const int16_t *dequant_ptr, + const cuml_bins_type_nuq *cuml_bins_ptr, + const dequant_val_type_nuq *dequant_val, + tran_low_t *qcoeff_ptr, + tran_low_t *dqcoeff_ptr, + uint16_t *eob_ptr, + const int16_t *scan, + const uint8_t *band) { + int eob = -1; + memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); + memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); + if (!skip_block) { + int i; + for (i = 0; i < n_coeffs; i++) { + const int rc = scan[i]; + if (quantize_coeff_fp_nuq(coeff_ptr[rc], + quant_ptr[rc != 0], + dequant_ptr[rc != 0], + cuml_bins_ptr[band[i]], + dequant_val[band[i]], + &qcoeff_ptr[rc], + &dqcoeff_ptr[rc])) + eob = i; + } + } + *eob_ptr = eob + 1; +} + +void quantize_32x32_nuq_c(const tran_low_t *coeff_ptr, + intptr_t n_coeffs, + int skip_block, + const int16_t *quant_ptr, + const int16_t *quant_shift_ptr, + const int16_t *dequant_ptr, + const cuml_bins_type_nuq *cuml_bins_ptr, + const dequant_val_type_nuq *dequant_val, + tran_low_t *qcoeff_ptr, + tran_low_t *dqcoeff_ptr, + uint16_t *eob_ptr, + const int16_t *scan, + const uint8_t *band) { + int eob = -1; + memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); + memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); + if (!skip_block) { + int i; + for (i = 0; i < n_coeffs; i++) { + const int rc = scan[i]; + if (quantize_coeff_bigtx_nuq(coeff_ptr[rc], + quant_ptr[rc != 0], + quant_shift_ptr[rc != 0], + dequant_ptr[rc != 0], + cuml_bins_ptr[band[i]], + dequant_val[band[i]], + &qcoeff_ptr[rc], + &dqcoeff_ptr[rc], + 0)) + eob = i; + } + } + *eob_ptr = eob + 1; +} + +void quantize_32x32_fp_nuq_c(const tran_low_t *coeff_ptr, + intptr_t n_coeffs, + int skip_block, + const int16_t *quant_ptr, + const int16_t *dequant_ptr, + const cuml_bins_type_nuq *cuml_bins_ptr, + const dequant_val_type_nuq *dequant_val, + tran_low_t *qcoeff_ptr, + tran_low_t *dqcoeff_ptr, + uint16_t *eob_ptr, + const int16_t *scan, + const uint8_t *band) { + int eob = -1; + memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); + memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); + if (!skip_block) { + int i; + for (i = 0; i < n_coeffs; i++) { + const int rc = scan[i]; + if (quantize_coeff_bigtx_fp_nuq(coeff_ptr[rc], + quant_ptr[rc != 0], + dequant_ptr[rc != 0], + cuml_bins_ptr[band[i]], + dequant_val[band[i]], + &qcoeff_ptr[rc], + &dqcoeff_ptr[rc], + 0)) + eob = i; + } + } + *eob_ptr = eob + 1; +} +#endif // CONFIG_NEW_QUANT + void vp10_quantize_skip(intptr_t n_coeffs, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr) { memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); @@ -131,6 +530,403 @@ void vp10_highbd_quantize_dc_facade( p->quant_fp[0], qcoeff_ptr, dqcoeff_ptr, pd->dequant[0], eob_ptr, qparam->log_scale); } + +#if CONFIG_NEW_QUANT +static INLINE int highbd_quantize_coeff_nuq(const tran_low_t coeffv, + const int16_t quant, + const int16_t quant_shift, + const int16_t dequant, + const tran_low_t *cuml_bins_ptr, + const tran_low_t *dequant_val, + tran_low_t *qcoeff_ptr, + tran_low_t *dqcoeff_ptr) { + const int coeff = coeffv; + const int coeff_sign = (coeff >> 31); + const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; + int i, q; + int64_t tmp = clamp(abs_coeff, INT32_MIN, INT32_MAX); + for (i = 0; i < NUQ_KNOTS; i++) { + if (tmp < cuml_bins_ptr[i]) { + q = i; + break; + } + } + if (i == NUQ_KNOTS) { + tmp -= cuml_bins_ptr[NUQ_KNOTS - 1]; + q = NUQ_KNOTS + (((((tmp * quant) >> 16) + tmp) * quant_shift) >> 16); + } + if (q) { + *dqcoeff_ptr = + dequant_abscoeff_nuq(q, dequant, dequant_val); + *qcoeff_ptr = (q ^ coeff_sign) - coeff_sign; + *dqcoeff_ptr = *qcoeff_ptr < 0 ? -*dqcoeff_ptr : *dqcoeff_ptr; + } else { + *qcoeff_ptr = 0; + *dqcoeff_ptr = 0; + } + return (q != 0); +} + +static INLINE int highbd_quantize_coeff_fp_nuq(const tran_low_t coeffv, + const int16_t quant, + const int16_t dequant, + const tran_low_t *cuml_bins_ptr, + const tran_low_t *dequant_val, + tran_low_t *qcoeff_ptr, + tran_low_t *dqcoeff_ptr) { + const int coeff = coeffv; + const int coeff_sign = (coeff >> 31); + const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; + int i, q; + int64_t tmp = clamp(abs_coeff, INT32_MIN, INT32_MAX); + for (i = 0; i < NUQ_KNOTS; i++) { + if (tmp < cuml_bins_ptr[i]) { + q = i; + break; + } + } + if (i == NUQ_KNOTS) { + q = NUQ_KNOTS + + (((tmp - cuml_bins_ptr[NUQ_KNOTS - 1]) * quant) >> 16); + } + if (q) { + *dqcoeff_ptr = + dequant_abscoeff_nuq(q, dequant, dequant_val); + *qcoeff_ptr = (q ^ coeff_sign) - coeff_sign; + *dqcoeff_ptr = *qcoeff_ptr < 0 ? -*dqcoeff_ptr : *dqcoeff_ptr; + } else { + *qcoeff_ptr = 0; + *dqcoeff_ptr = 0; + } + return (q != 0); +} + +static INLINE int highbd_quantize_coeff_bigtx_fp_nuq( + const tran_low_t coeffv, + const int16_t quant, + const int16_t dequant, + const tran_low_t *cuml_bins_ptr, + const tran_low_t *dequant_val, + tran_low_t *qcoeff_ptr, + tran_low_t *dqcoeff_ptr, + int logsizeby32) { + const int coeff = coeffv; + const int coeff_sign = (coeff >> 31); + const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; + int i, q; + int64_t tmp = clamp(abs_coeff, INT32_MIN, INT32_MAX); + for (i = 0; i < NUQ_KNOTS; i++) { + if (tmp < ROUND_POWER_OF_TWO(cuml_bins_ptr[i], 1 + logsizeby32)) { + q = i; + break; + } + } + if (i == NUQ_KNOTS) { + q = NUQ_KNOTS + + (((tmp - ROUND_POWER_OF_TWO(cuml_bins_ptr[NUQ_KNOTS - 1], + 1 + logsizeby32)) * quant) >> + (15 - logsizeby32)); + } + if (q) { + *dqcoeff_ptr = + ROUND_POWER_OF_TWO(dequant_abscoeff_nuq(q, dequant, dequant_val), + 1 + logsizeby32); + *qcoeff_ptr = (q ^ coeff_sign) - coeff_sign; + *dqcoeff_ptr = *qcoeff_ptr < 0 ? -*dqcoeff_ptr : *dqcoeff_ptr; + } else { + *qcoeff_ptr = 0; + *dqcoeff_ptr = 0; + } + return (q != 0); +} + +static INLINE int highbd_quantize_coeff_bigtx_nuq(const tran_low_t coeffv, + const int16_t quant, + const int16_t quant_shift, + const int16_t dequant, + const tran_low_t + *cuml_bins_ptr, + const tran_low_t *dequant_val, + tran_low_t *qcoeff_ptr, + tran_low_t *dqcoeff_ptr, + int logsizeby32) { + const int coeff = coeffv; + const int coeff_sign = (coeff >> 31); + const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; + int i, q; + int64_t tmp = clamp(abs_coeff, INT32_MIN, INT32_MAX); + for (i = 0; i < NUQ_KNOTS; i++) { + if (tmp < ROUND_POWER_OF_TWO(cuml_bins_ptr[i], 1 + logsizeby32)) { + q = i; + break; + } + } + if (i == NUQ_KNOTS) { + tmp -= ROUND_POWER_OF_TWO(cuml_bins_ptr[NUQ_KNOTS - 1], 1 + logsizeby32); + q = NUQ_KNOTS + + (((((tmp * quant) >> 16) + tmp) * quant_shift) >> (15 - logsizeby32)); + } + if (q) { + *dqcoeff_ptr = + ROUND_POWER_OF_TWO(dequant_abscoeff_nuq(q, dequant, dequant_val), + 1 + logsizeby32); + *qcoeff_ptr = (q ^ coeff_sign) - coeff_sign; + *dqcoeff_ptr = *qcoeff_ptr < 0 ? -*dqcoeff_ptr : *dqcoeff_ptr; + } else { + *qcoeff_ptr = 0; + *dqcoeff_ptr = 0; + } + return (q != 0); +} + +void highbd_quantize_dc_nuq(const tran_low_t *coeff_ptr, + intptr_t n_coeffs, + int skip_block, + const int16_t quant, + const int16_t quant_shift, + const int16_t dequant, + const tran_low_t *cuml_bins_ptr, + const tran_low_t *dequant_val, + tran_low_t *qcoeff_ptr, + tran_low_t *dqcoeff_ptr, + uint16_t *eob_ptr) { + int eob = -1; + memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); + memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); + if (!skip_block) { + const int rc = 0; + if (highbd_quantize_coeff_nuq(coeff_ptr[rc], + quant, + quant_shift, + dequant, + cuml_bins_ptr, + dequant_val, + qcoeff_ptr, + dqcoeff_ptr)) + eob = 0; + } + *eob_ptr = eob + 1; +} + +void highbd_quantize_dc_fp_nuq(const tran_low_t *coeff_ptr, + intptr_t n_coeffs, + int skip_block, + const int16_t quant, + const int16_t dequant, + const tran_low_t *cuml_bins_ptr, + const tran_low_t *dequant_val, + tran_low_t *qcoeff_ptr, + tran_low_t *dqcoeff_ptr, + uint16_t *eob_ptr) { + int eob = -1; + memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); + memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); + if (!skip_block) { + const int rc = 0; + if (highbd_quantize_coeff_fp_nuq(coeff_ptr[rc], + quant, + dequant, + cuml_bins_ptr, + dequant_val, + qcoeff_ptr, + dqcoeff_ptr)) + eob = 0; + } + *eob_ptr = eob + 1; +} + +void highbd_quantize_nuq_c(const tran_low_t *coeff_ptr, + intptr_t n_coeffs, + int skip_block, + const int16_t *quant_ptr, + const int16_t *quant_shift_ptr, + const int16_t *dequant_ptr, + const cuml_bins_type_nuq *cuml_bins_ptr, + const dequant_val_type_nuq *dequant_val, + tran_low_t *qcoeff_ptr, + tran_low_t *dqcoeff_ptr, + uint16_t *eob_ptr, + const int16_t *scan, + const uint8_t *band) { + int eob = -1; + memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); + memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); + if (!skip_block) { + int i; + for (i = 0; i < n_coeffs; i++) { + const int rc = scan[i]; + if (highbd_quantize_coeff_nuq(coeff_ptr[rc], + quant_ptr[rc != 0], + quant_shift_ptr[rc != 0], + dequant_ptr[rc != 0], + cuml_bins_ptr[band[i]], + dequant_val[band[i]], + &qcoeff_ptr[rc], + &dqcoeff_ptr[rc])) + eob = i; + } + } + *eob_ptr = eob + 1; +} + +void highbd_quantize_32x32_nuq_c(const tran_low_t *coeff_ptr, + intptr_t n_coeffs, + int skip_block, + const int16_t *quant_ptr, + const int16_t *quant_shift_ptr, + const int16_t *dequant_ptr, + const cuml_bins_type_nuq *cuml_bins_ptr, + const dequant_val_type_nuq *dequant_val, + tran_low_t *qcoeff_ptr, + tran_low_t *dqcoeff_ptr, + uint16_t *eob_ptr, + const int16_t *scan, + const uint8_t *band) { + int eob = -1; + memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); + memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); + if (!skip_block) { + int i; + for (i = 0; i < n_coeffs; i++) { + const int rc = scan[i]; + if (highbd_quantize_coeff_bigtx_nuq(coeff_ptr[rc], + quant_ptr[rc != 0], + quant_shift_ptr[rc != 0], + dequant_ptr[rc != 0], + cuml_bins_ptr[band[i]], + dequant_val[band[i]], + &qcoeff_ptr[rc], + &dqcoeff_ptr[rc], + 0)) + eob = i; + } + } + *eob_ptr = eob + 1; +} + +void highbd_quantize_32x32_fp_nuq_c(const tran_low_t *coeff_ptr, + intptr_t n_coeffs, + int skip_block, + const int16_t *quant_ptr, + const int16_t *dequant_ptr, + const cuml_bins_type_nuq *cuml_bins_ptr, + const dequant_val_type_nuq *dequant_val, + tran_low_t *qcoeff_ptr, + tran_low_t *dqcoeff_ptr, + uint16_t *eob_ptr, + const int16_t *scan, + const uint8_t *band) { + int eob = -1; + memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); + memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); + if (!skip_block) { + int i; + for (i = 0; i < n_coeffs; i++) { + const int rc = scan[i]; + if (highbd_quantize_coeff_bigtx_fp_nuq(coeff_ptr[rc], + quant_ptr[rc != 0], + dequant_ptr[rc != 0], + cuml_bins_ptr[band[i]], + dequant_val[band[i]], + &qcoeff_ptr[rc], + &dqcoeff_ptr[rc], + 0)) + eob = i; + } + } + *eob_ptr = eob + 1; +} + +void highbd_quantize_fp_nuq_c(const tran_low_t *coeff_ptr, + intptr_t n_coeffs, + int skip_block, + const int16_t *quant_ptr, + const int16_t *dequant_ptr, + const cuml_bins_type_nuq *cuml_bins_ptr, + const dequant_val_type_nuq *dequant_val, + tran_low_t *qcoeff_ptr, + tran_low_t *dqcoeff_ptr, + uint16_t *eob_ptr, + const int16_t *scan, + const uint8_t *band) { + int eob = -1; + memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); + memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); + if (!skip_block) { + int i; + for (i = 0; i < n_coeffs; i++) { + const int rc = scan[i]; + if (highbd_quantize_coeff_fp_nuq(coeff_ptr[rc], + quant_ptr[rc != 0], + dequant_ptr[rc != 0], + cuml_bins_ptr[band[i]], + dequant_val[band[i]], + &qcoeff_ptr[rc], + &dqcoeff_ptr[rc])) + eob = i; + } + } + *eob_ptr = eob + 1; +} + +void highbd_quantize_dc_32x32_nuq(const tran_low_t *coeff_ptr, + intptr_t n_coeffs, + int skip_block, + const int16_t quant, + const int16_t quant_shift, + const int16_t dequant, + const tran_low_t *cuml_bins_ptr, + const tran_low_t *dequant_val, + tran_low_t *qcoeff_ptr, + tran_low_t *dqcoeff_ptr, + uint16_t *eob_ptr) { + int eob = -1; + memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); + memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); + if (!skip_block) { + const int rc = 0; + if (highbd_quantize_coeff_bigtx_nuq(coeff_ptr[rc], + quant, + quant_shift, + dequant, + cuml_bins_ptr, + dequant_val, + qcoeff_ptr, + dqcoeff_ptr, + 0)) + eob = 0; + } + *eob_ptr = eob + 1; +} + +void highbd_quantize_dc_32x32_fp_nuq(const tran_low_t *coeff_ptr, + intptr_t n_coeffs, + int skip_block, + const int16_t quant, + const int16_t dequant, + const tran_low_t *cuml_bins_ptr, + const tran_low_t *dequant_val, + tran_low_t *qcoeff_ptr, + tran_low_t *dqcoeff_ptr, + uint16_t *eob_ptr) { + int eob = -1; + memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); + memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); + if (!skip_block) { + const int rc = 0; + if (highbd_quantize_coeff_bigtx_fp_nuq(coeff_ptr[rc], + quant, + dequant, + cuml_bins_ptr, + dequant_val, + qcoeff_ptr, + dqcoeff_ptr, + 0)) + eob = 0; + } + *eob_ptr = eob + 1; +} +#endif // CONFIG_NEW_QUANT #endif // CONFIG_VP9_HIGHBITDEPTH void vp10_quantize_fp_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, @@ -186,7 +982,7 @@ void vp10_highbd_quantize_fp_c(const tran_low_t *coeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, - const int16_t *iscan, int log_scale) { + const int16_t *iscan, const int log_scale) { int i; int eob = -1; const int scale = 1 << log_scale; @@ -219,7 +1015,8 @@ void vp10_highbd_quantize_fp_c(const tran_low_t *coeff_ptr, } *eob_ptr = eob + 1; } -#endif + +#endif // CONFIG_VP9_HIGHBITDEPTH // TODO(jingning) Refactor this file and combine functions with similar // operations. @@ -272,7 +1069,7 @@ void vp10_highbd_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, - const int16_t *iscan, int log_scale) { + const int16_t *iscan, const int log_scale) { int i, non_zero_count = (int)n_coeffs, eob = -1; int zbins[2] = {zbin_ptr[0], zbin_ptr[1]}; int round[2] = {round_ptr[0], round_ptr[1]}; @@ -452,6 +1249,20 @@ void vp10_init_quantizer(VP10_COMP *cpi) { cpi->uv_dequant[q][i] = quant; } +#if CONFIG_NEW_QUANT + // TODO(sarahparker) do this for multiple profiles once they are added + for (i = 0; i < COEF_BANDS; i++) { + const int quant = cpi->y_dequant[q][i != 0]; + const int uvquant = cpi->uv_dequant[q][i != 0]; + get_dequant_val_nuq(quant, q == 0, i, + cpi->y_dequant_val_nuq[q][i], + quants->y_cuml_bins_nuq[q][i]); + get_dequant_val_nuq(uvquant, q == 0, i, + cpi->uv_dequant_val_nuq[q][i], + quants->uv_cuml_bins_nuq[q][i]); + } +#endif // CONFIG_NEW_QUANT + for (i = 2; i < 8; i++) { // 8: SIMD width quants->y_quant[q][i] = quants->y_quant[q][1]; quants->y_quant_fp[q][i] = quants->y_quant_fp[q][1]; @@ -489,6 +1300,12 @@ void vp10_init_plane_quantizers(VP10_COMP *cpi, MACROBLOCK *x) { x->plane[0].zbin = quants->y_zbin[qindex]; x->plane[0].round = quants->y_round[qindex]; xd->plane[0].dequant = cpi->y_dequant[qindex]; +#if CONFIG_NEW_QUANT + x->plane[0].cuml_bins_nuq = quants->y_cuml_bins_nuq[qindex]; + xd->plane[0].dequant_val_nuq = (const dequant_val_type_nuq*) + cpi->y_dequant_val_nuq[qindex]; +#endif // CONFIG_NEW_QUANT + x->plane[0].quant_thred[0] = x->plane[0].zbin[0] * x->plane[0].zbin[0]; x->plane[0].quant_thred[1] = x->plane[0].zbin[1] * x->plane[0].zbin[1]; @@ -502,6 +1319,11 @@ void vp10_init_plane_quantizers(VP10_COMP *cpi, MACROBLOCK *x) { x->plane[i].zbin = quants->uv_zbin[qindex]; x->plane[i].round = quants->uv_round[qindex]; xd->plane[i].dequant = cpi->uv_dequant[qindex]; +#if CONFIG_NEW_QUANT + x->plane[i].cuml_bins_nuq = quants->uv_cuml_bins_nuq[qindex]; + xd->plane[i].dequant_val_nuq = (const dequant_val_type_nuq*) + cpi->uv_dequant_val_nuq[qindex]; +#endif // CONFIG_NEW_QUANT x->plane[i].quant_thred[0] = x->plane[i].zbin[0] * x->plane[i].zbin[0]; x->plane[i].quant_thred[1] = x->plane[i].zbin[1] * x->plane[i].zbin[1]; diff --git a/vp10/encoder/quantize.h b/vp10/encoder/quantize.h index 5e62eb25e..dd10528b3 100644 --- a/vp10/encoder/quantize.h +++ b/vp10/encoder/quantize.h @@ -32,6 +32,14 @@ typedef void (*VP10_QUANT_FACADE)(const tran_low_t *coeff_ptr, const QUANT_PARAM *qparam); typedef struct { +#if CONFIG_NEW_QUANT + DECLARE_ALIGNED(16, tran_low_t, + y_cuml_bins_nuq[QINDEX_RANGE][COEF_BANDS] + [NUQ_KNOTS]); + DECLARE_ALIGNED(16, tran_low_t, + uv_cuml_bins_nuq[QINDEX_RANGE][COEF_BANDS] + [NUQ_KNOTS]); +#endif // CONFIG_NEW_QUANT // 0: dc 1: ac 2-8: ac repeated to SIMD width DECLARE_ALIGNED(16, int16_t, y_quant[QINDEX_RANGE][8]); DECLARE_ALIGNED(16, int16_t, y_quant_shift[QINDEX_RANGE][8]); @@ -88,6 +96,52 @@ void vp10_quantize_dc_facade(const tran_low_t *coeff_ptr, intptr_t n_coeffs, const MACROBLOCKD_PLANE *pd, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const scan_order *sc, const QUANT_PARAM *qparam); + +#if CONFIG_NEW_QUANT +void quantize_dc_nuq(const tran_low_t *coeff_ptr, + intptr_t n_coeffs, + int skip_block, + const int16_t quant, + const int16_t quant_shift, + const int16_t dequant, + const tran_low_t *cuml_bins_ptr, + const tran_low_t *dequant_val, + tran_low_t *qcoeff_ptr, + tran_low_t *dqcoeff_ptr, + uint16_t *eob_ptr); +void quantize_dc_32x32_nuq(const tran_low_t *coeff_ptr, + intptr_t n_coeffs, + int skip_block, + const int16_t quant, + const int16_t quant_shift, + const int16_t dequant, + const tran_low_t *cuml_bins_ptr, + const tran_low_t *dequant_val, + tran_low_t *qcoeff_ptr, + tran_low_t *dqcoeff_ptr, + uint16_t *eob_ptr); +void quantize_dc_fp_nuq(const tran_low_t *coeff_ptr, + intptr_t n_coeffs, + int skip_block, + const int16_t quant, + const int16_t dequant, + const tran_low_t *cuml_bins_ptr, + const tran_low_t *dequant_val, + tran_low_t *qcoeff_ptr, + tran_low_t *dqcoeff_ptr, + uint16_t *eob_ptr); +void quantize_dc_32x32_fp_nuq(const tran_low_t *coeff_ptr, + intptr_t n_coeffs, + int skip_block, + const int16_t quant, + const int16_t dequant, + const tran_low_t *cuml_bins_ptr, + const tran_low_t *dequant_val, + tran_low_t *qcoeff_ptr, + tran_low_t *dqcoeff_ptr, + uint16_t *eob_ptr); +#endif // CONFIG_NEW_QUANT + #if CONFIG_VP9_HIGHBITDEPTH void vp10_highbd_quantize_fp_facade( const tran_low_t *coeff_ptr, intptr_t n_coeffs, const MACROBLOCK_PLANE *p, @@ -115,6 +169,51 @@ void vp10_highbd_quantize_dc(const tran_low_t *coeff_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t dequant_ptr, uint16_t *eob_ptr, const int log_scale); +#if CONFIG_NEW_QUANT +void highbd_quantize_dc_nuq(const tran_low_t *coeff_ptr, + intptr_t n_coeffs, + int skip_block, + const int16_t quant, + const int16_t quant_shift, + const int16_t dequant, + const tran_low_t *cuml_bins_ptr, + const tran_low_t *dequant_val, + tran_low_t *qcoeff_ptr, + tran_low_t *dqcoeff_ptr, + uint16_t *eob_ptr); +void highbd_quantize_dc_32x32_nuq(const tran_low_t *coeff_ptr, + intptr_t n_coeffs, + int skip_block, + const int16_t quant, + const int16_t quant_shift, + const int16_t dequant, + const tran_low_t *cuml_bins_ptr, + const tran_low_t *dequant_val, + tran_low_t *qcoeff_ptr, + tran_low_t *dqcoeff_ptr, + uint16_t *eob_ptr); +void highbd_quantize_dc_fp_nuq(const tran_low_t *coeff_ptr, + intptr_t n_coeffs, + int skip_block, + const int16_t quant, + const int16_t dequant, + const tran_low_t *cuml_bins_ptr, + const tran_low_t *dequant_val, + tran_low_t *qcoeff_ptr, + tran_low_t *dqcoeff_ptr, + uint16_t *eob_ptr); +void highbd_quantize_dc_32x32_fp_nuq(const tran_low_t *coeff_ptr, + intptr_t n_coeffs, + int skip_block, + const int16_t quant, + const int16_t dequant, + const tran_low_t *cuml_bins_ptr, + const tran_low_t *dequant_val, + tran_low_t *qcoeff_ptr, + tran_low_t *dqcoeff_ptr, + uint16_t *eob_ptr); + +#endif // CONFIG_NEW_QUANT #endif // CONFIG_VP9_HIGHBITDEPTH #ifdef __cplusplus diff --git a/vp10/encoder/rdopt.c b/vp10/encoder/rdopt.c index 8c4b2020a..640a409c9 100644 --- a/vp10/encoder/rdopt.c +++ b/vp10/encoder/rdopt.c @@ -1259,8 +1259,13 @@ static void block_rd_txfm(int plane, int block, int blk_row, int blk_col, if (x->skip_txfm[plane][block >> (tx_size << 1)] == SKIP_TXFM_NONE) { // full forward transform and quantization +#if CONFIG_NEW_QUANT + vp10_xform_quant_nuq(x, plane, block, blk_row, blk_col, + plane_bsize, tx_size); +#else vp10_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize, tx_size, VP10_XFORM_QUANT_B); +#endif // CONFIG_NEW_QUANT dist_block(args->cpi, x, plane, block, blk_row, blk_col, tx_size, &dist, &sse); } else if (x->skip_txfm[plane][block >> (tx_size << 1)] == @@ -1268,8 +1273,17 @@ static void block_rd_txfm(int plane, int block, int blk_row, int blk_col, // compute DC coefficient tran_low_t *const coeff = BLOCK_OFFSET(x->plane[plane].coeff, block); tran_low_t *const dqcoeff = BLOCK_OFFSET(xd->plane[plane].dqcoeff, block); +#if CONFIG_NEW_QUANT + if (x->quant_fp) + vp10_xform_quant_dc_fp_nuq(x, plane, block, blk_row, blk_col, + plane_bsize, tx_size); + else + vp10_xform_quant_dc_nuq(x, plane, block, blk_row, blk_col, + plane_bsize, tx_size); +#else vp10_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize, tx_size, VP10_XFORM_QUANT_DC); +#endif // CONFIG_NEW_QUANT sse = x->bsse[plane][block >> (tx_size << 1)] << 4; dist = sse; if (x->plane[plane].eobs[block]) { @@ -1295,8 +1309,17 @@ static void block_rd_txfm(int plane, int block, int blk_row, int blk_col, } } else { // full forward transform and quantization +#if CONFIG_NEW_QUANT + if (x->quant_fp) + vp10_xform_quant_fp_nuq(x, plane, block, blk_row, blk_col, plane_bsize, + tx_size); + else + vp10_xform_quant_nuq(x, plane, block, blk_row, blk_col, plane_bsize, + tx_size); +#else vp10_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize, tx_size, VP10_XFORM_QUANT_B); +#endif // CONFIG_NEW_QUANT dist_block(args->cpi, x, plane, block, blk_row, blk_col, tx_size, &dist, &sse); } diff --git a/vp10/encoder/speed_features.c b/vp10/encoder/speed_features.c index 3f411b77e..bd0cb8189 100644 --- a/vp10/encoder/speed_features.c +++ b/vp10/encoder/speed_features.c @@ -239,7 +239,6 @@ static void set_good_speed_feature(VP10_COMP *cpi, VP10_COMMON *cm, static void set_rt_speed_feature_framesize_dependent(VP10_COMP *cpi, SPEED_FEATURES *sf, int speed) { VP10_COMMON *const cm = &cpi->common; - if (speed >= 1) { if (VPXMIN(cm->width, cm->height) >= 720) { sf->disable_split_mask = cm->show_frame ? DISABLE_ALL_SPLIT @@ -309,6 +308,7 @@ static void set_rt_speed_feature(VP10_COMP *cpi, SPEED_FEATURES *sf, sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V; } + if (speed >= 2) { sf->mode_search_skip_flags = (cm->frame_type == KEY_FRAME) ? 0 : FLAG_SKIP_INTRA_DIRMISMATCH |