Non-uniform quantization experiment

This framework allows lower quantization bins to be shrunk down or expanded to match closer the source distribution (assuming a generalized gaussian-like central peaky model for the coefficients) in an entropy-constrained sense. Specifically, the width of the bins 0-4 are modified as a factor of the nominal quantization step size and from 5 onwards all bins become the same as the nominal quantization step size. Further, different bin width profiles as well as reconstruction values can be used based on the coefficient band as well as the quantization step size divided into 5 ranges. A small gain currently on derflr of about 0.16% is observed with the same paraemters for all q values. Optimizing the parameters based on qstep value is left as a TODO for now. Results on derflr with all expts on is +6.08% (up from 5.88%). Experiments are in progress to tune the parameters for different coefficient bands and quantization step ranges. Change-Id: I88429d8cb0777021bfbb689ef69b764eafb3a1de
2015-03-04 14:04:11 -08:00 · 2015-03-04 14:04:11 -08:00 · c8ed36432e
commit c8ed36432e
parent 9a92891ac4
14 changed files with 2862 additions and 49 deletions
--- a/1
+++ b/1
@ -294,6 +294,7 @@ EXPERIMENT_LIST="
    global_motion
    palette
    newmvref_sub8x8
+    new_quant
 "
 CONFIG_LIST="
    external_build
--- a/vp9/common/vp9_blockd.h
+++ b/vp9/common/vp9_blockd.h
@ -20,6 +20,7 @@
 #include "vp9/common/vp9_common_data.h"
 #include "vp9/common/vp9_filter.h"
 #include "vp9/common/vp9_mv.h"
+#include "vp9/common/vp9_quant_common.h"
 #include "vp9/common/vp9_scale.h"

 #ifdef __cplusplus
@ -306,6 +307,9 @@ struct macroblockd_plane {
  struct buf_2d dst;
  struct buf_2d pre[2];
  const int16_t *dequant;
+#if CONFIG_NEW_QUANT
+  const dequant_val_type_nuq *dequant_val_nuq;
+#endif
  ENTROPY_CONTEXT *above_context;
  ENTROPY_CONTEXT *left_context;
 #if CONFIG_PALETTE
@ -548,6 +552,20 @@ static inline int get_wedge_bits(BLOCK_SIZE sb_type) {
 }
 #endif  // CONFIG_WEDGE_PARTITION

+#if CONFIG_NEW_QUANT && CONFIG_TX_SKIP
+static inline int is_rect_quant_used(const MB_MODE_INFO *mbmi,
+                                     int plane) {
+  return
+      mbmi->tx_skip[plane != 0] &&
+      ((plane == 0 && (mbmi->mode == V_PRED ||
+                       mbmi->mode == H_PRED ||
+                       mbmi->mode == TM_PRED)) ||
+       (plane != 0 && (mbmi->uv_mode == V_PRED ||
+                       mbmi->uv_mode == H_PRED ||
+                       mbmi->uv_mode == TM_PRED)));
+}
+#endif
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif
--- a/vp9/common/vp9_onyxc_int.h
+++ b/vp9/common/vp9_onyxc_int.h
@ -65,9 +65,14 @@ typedef struct {

 typedef struct VP9Common {
  struct vpx_internal_error_info  error;
-
  DECLARE_ALIGNED(16, int16_t, y_dequant[QINDEX_RANGE][8]);
  DECLARE_ALIGNED(16, int16_t, uv_dequant[QINDEX_RANGE][8]);
+#if CONFIG_NEW_QUANT
+  DECLARE_ALIGNED(16, dequant_val_type_nuq,
+                  y_dequant_val_nuq[QINDEX_RANGE][COEF_BANDS]);
+  DECLARE_ALIGNED(16, dequant_val_type_nuq,
+                  uv_dequant_val_nuq[QINDEX_RANGE][COEF_BANDS]);
+#endif  // CONFIG_NEW_QUANT

  vpx_color_space_t color_space;

--- a/vp9/common/vp9_quant_common.c
+++ b/vp9/common/vp9_quant_common.c
@ -8,10 +8,145 @@
 *  be found in the AUTHORS file in the root of the source tree.
 */

+#include <stdio.h>
+#include <math.h>
 #include "vp9/common/vp9_common.h"
+#include "vp9/common/vp9_entropy.h"
 #include "vp9/common/vp9_quant_common.h"
 #include "vp9/common/vp9_seg_common.h"

+#if CONFIG_NEW_QUANT
+// Bin widths expressed as a fraction over 128 of the quant stepsize,
+// for the quantization bins 0-4.
+// So a value x indicates the bin is actually factor x/128 of the
+// nominal quantization step.  For the zero bin, the width is only
+// for one side of zero, so the actual width is twice that.
+// There are four sets of values for 4 different quantizer ranges.
+//
+// TODO(debargha): Optimize these tables
+static const uint8_t vp9_nuq_knotes_tiny[COEF_BANDS][NUQ_KNOTES] = {
+  {84, 124, 128, 128, 128},  // dc, band 0
+  {84, 124, 128, 128, 128},  // band 1
+  {84, 124, 128, 128, 128},  // band 2
+  {86, 124, 128, 128, 128},  // band 3
+  {86, 124, 128, 128, 128},  // band 4
+  {86, 124, 128, 128, 128},  // band 5
+};
+static const uint8_t vp9_nuq_knotes_low[COEF_BANDS][NUQ_KNOTES] = {
+  {84, 124, 128, 128, 128},  // dc, band 0
+  {84, 124, 128, 128, 128},  // band 1
+  {84, 124, 128, 128, 128},  // band 2
+  {86, 124, 128, 128, 128},  // band 3
+  {86, 124, 128, 128, 128},  // band 4
+  {86, 124, 128, 128, 128},  // band 5
+};
+static const uint8_t vp9_nuq_knotes_mid[COEF_BANDS][NUQ_KNOTES] = {
+  {84, 124, 128, 128, 128},  // dc, band 0
+  {84, 124, 128, 128, 128},  // band 1
+  {84, 124, 128, 128, 128},  // band 2
+  {86, 124, 128, 128, 128},  // band 3
+  {86, 124, 128, 128, 128},  // band 4
+  {86, 124, 128, 128, 128},  // band 5
+};
+static const uint8_t vp9_nuq_knotes_high[COEF_BANDS][NUQ_KNOTES] = {
+  {84, 124, 128, 128, 128},  // dc, band 0
+  {84, 124, 128, 128, 128},  // band 1
+  {84, 124, 128, 128, 128},  // band 2
+  {86, 124, 128, 128, 128},  // band 3
+  {86, 124, 128, 128, 128},  // band 4
+  {86, 124, 128, 128, 128},  // band 5
+};
+static const uint8_t vp9_nuq_knotes_huge[COEF_BANDS][NUQ_KNOTES] = {
+  {84, 124, 128, 128, 128},  // dc, band 0
+  {84, 124, 128, 128, 128},  // band 1
+  {84, 124, 128, 128, 128},  // band 2
+  {86, 124, 128, 128, 128},  // band 3
+  {86, 124, 128, 128, 128},  // band 4
+  {86, 124, 128, 128, 128},  // band 5
+};
+
+static const uint8_t vp9_nuq_doff_tiny[COEF_BANDS] = { 8, 16, 17, 22, 23, 24 };
+static const uint8_t vp9_nuq_doff_low[COEF_BANDS] =  { 8, 16, 17, 22, 23, 24 };
+static const uint8_t vp9_nuq_doff_mid[COEF_BANDS] =  { 8, 16, 17, 22, 23, 24 };
+static const uint8_t vp9_nuq_doff_high[COEF_BANDS] = { 8, 16, 17, 22, 23, 24 };
+static const uint8_t vp9_nuq_doff_huge[COEF_BANDS] = { 8, 16, 17, 22, 23, 24 };
+
+// Allow different quantization profiles in different q ranges,
+// to enable entropy-constraints in scalar quantization.
+
+static const uint8_t *get_nuq_knotes(int16_t quant, int band, int bd) {
+  const int shift = bd - 8;
+  if (quant > (512 << shift))
+    return vp9_nuq_knotes_huge[band];
+  else if (quant > (256 << shift))
+    return vp9_nuq_knotes_high[band];
+  else if (quant > (128 << shift))
+    return vp9_nuq_knotes_mid[band];
+  else if (quant > (64 << shift))
+    return vp9_nuq_knotes_low[band];
+  else
+    return vp9_nuq_knotes_tiny[band];
+}
+
+static INLINE int16_t quant_to_doff_fixed(int16_t quant, int band, int bd) {
+  const int shift = bd - 8;
+  if (quant > (512 << shift))
+    return vp9_nuq_doff_huge[band];
+  else if (quant > (256 << shift))
+    return vp9_nuq_doff_high[band];
+  else if (quant > (128 << shift))
+    return vp9_nuq_doff_mid[band];
+  else if (quant > (64 << shift))
+    return vp9_nuq_doff_low[band];
+  else
+    return vp9_nuq_doff_tiny[band];
+}
+
+static INLINE void get_cumbins_nuq(int q, int band, int bd,
+                                   tran_low_t *cumbins) {
+  const uint8_t *knotes = get_nuq_knotes(q, band, bd);
+  int16_t cumknotes[NUQ_KNOTES];
+  int i;
+  cumknotes[0] = knotes[0];
+  for (i = 1; i < NUQ_KNOTES; ++i)
+    cumknotes[i] = cumknotes[i - 1] + knotes[i];
+  for (i = 0; i < NUQ_KNOTES; ++i)
+    cumbins[i] = (cumknotes[i] * q + 64) >> 7;
+}
+
+void vp9_get_dequant_val_nuq(int q, int band, int bd,
+                             tran_low_t *dq, tran_low_t *cumbins) {
+  const uint8_t *knotes = get_nuq_knotes(q, band, bd);
+  tran_low_t cumbins_[NUQ_KNOTES], *cumbins_ptr;
+  tran_low_t doff;
+  int i;
+  cumbins_ptr = (cumbins ? cumbins : cumbins_);
+  get_cumbins_nuq(q, band, bd, cumbins_ptr);
+  dq[0] = 0;
+  for (i = 1; i < NUQ_KNOTES; ++i) {
+    const int16_t qstep = (knotes[i] * q + 64) >> 7;
+    doff = quant_to_doff_fixed(qstep, band, bd);
+    doff = (2 * doff * qstep + q) / (2 * q);
+    dq[i] = cumbins_ptr[i - 1] + (((knotes[i] - doff * 2) * q + 128) >> 8);
+  }
+  doff = quant_to_doff_fixed(q, band, bd);
+  dq[NUQ_KNOTES] =
+      cumbins_ptr[NUQ_KNOTES - 1] + (((64 - doff) * q + 64) >> 7);
+}
+
+tran_low_t vp9_dequant_abscoeff_nuq(int v, int q, const tran_low_t *dq) {
+  if (v <= NUQ_KNOTES)
+    return dq[v];
+  else
+    return dq[NUQ_KNOTES] + (v - NUQ_KNOTES) * q;
+}
+
+tran_low_t vp9_dequant_coeff_nuq(int v, int q, const tran_low_t *dq) {
+  tran_low_t dqmag = vp9_dequant_abscoeff_nuq(abs(v), q, dq);
+  return (v < 0 ? -dqmag : dqmag);
+}
+#endif  // CONFIG_NEW_QUANT
+
 static const int16_t dc_qlookup[QINDEX_RANGE] = {
  4,       8,    8,    9,   10,   11,   12,   12,
  13,     14,   15,   16,   17,   18,   19,   19,
@ -275,4 +410,3 @@ int vp9_get_qindex(const struct segmentation *seg, int segment_id,
    return base_qindex;
  }
 }
-
--- a/vp9/common/vp9_quant_common.h
+++ b/vp9/common/vp9_quant_common.h
@ -11,6 +11,8 @@
 #ifndef VP9_COMMON_VP9_QUANT_COMMON_H_
 #define VP9_COMMON_VP9_QUANT_COMMON_H_

+#include <stdio.h>
+
 #include "vpx/vpx_codec.h"
 #include "vp9/common/vp9_seg_common.h"

@ -34,6 +36,21 @@ int16_t vp9_ac_quant(int qindex, int delta, vpx_bit_depth_t bit_depth);
 int vp9_get_qindex(const struct segmentation *seg, int segment_id,
                   int base_qindex);

+static INLINE int16_t vp9_round_factor_to_round(int16_t quant,
+                                                int16_t round_factor) {
+  return (round_factor * quant) >> 7;
+}
+
+#if CONFIG_NEW_QUANT
+#define NUQ_KNOTES 5
+typedef tran_low_t dequant_val_type_nuq[NUQ_KNOTES + 1];
+typedef tran_low_t cumbins_type_nuq[NUQ_KNOTES];
+void vp9_get_dequant_val_nuq(int q, int band, int bd,
+                             tran_low_t *dq, tran_low_t *cumbins);
+tran_low_t vp9_dequant_abscoeff_nuq(int v, int q, const tran_low_t *dq);
+tran_low_t vp9_dequant_coeff_nuq(int v, int q, const tran_low_t *dq);
+#endif  // CONFIG_NEW_QUANT
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif
--- a/vp9/common/vp9_rtcd_defs.pl
+++ b/vp9/common/vp9_rtcd_defs.pl
@ -7,6 +7,7 @@ print <<EOF
 #include "vpx/vpx_integer.h"
 #include "vp9/common/vp9_common.h"
 #include "vp9/common/vp9_enums.h"
+#include "vp9/common/vp9_quant_common.h"

 struct macroblockd;

@ -1402,6 +1403,27 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
  }
 }

+if (vpx_config("CONFIG_NEW_QUANT") eq "yes") {
+  add_proto qw/void vp9_quantize_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, const int16_t *dequant_ptr, const cumbins_type_nuq *cumbins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
+  specialize qw/vp9_quantize_nuq/;
+
+  add_proto qw/void vp9_quantize_fp_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *dequant_ptr, const cumbins_type_nuq *cumbins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
+  specialize qw/vp9_quantize_fp_nuq/;
+
+  add_proto qw/void vp9_quantize_32x32_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, const int16_t *dequant_ptr, const cumbins_type_nuq *cumbins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
+  specialize qw/vp9_quantize_32x32_nuq/;
+
+  add_proto qw/void vp9_quantize_32x32_fp_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *dequant_ptr, const cumbins_type_nuq *cumbins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
+  specialize qw/vp9_quantize_32x32_fp_nuq/;
+
+  if (vpx_config("CONFIG_TX64X64") eq "yes") {
+    add_proto qw/void vp9_quantize_64x64_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, const int16_t *dequant_ptr, const cumbins_type_nuq *cumbins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
+    specialize qw/vp9_quantize_64x64_nuq/;
+
+    add_proto qw/void vp9_quantize_64x64_fp_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *dequant_ptr, const cumbins_type_nuq *cumbins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
+    specialize qw/vp9_quantize_64x64_fp_nuq/;
+  }
+}
 #
 # Structured Similarity (SSIM)
 #
@ -2131,6 +2153,28 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {

    add_proto qw/void vp9_highbd_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
    specialize qw/vp9_highbd_quantize_b_64x64/;
+
+    if (vpx_config("CONFIG_NEW_QUANT") eq "yes") {
+      add_proto qw/void vp9_highbd_quantize_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, const int16_t *dequant_ptr, const cumbins_type_nuq *cumbins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
+      specialize qw/vp9_highbd_quantize_nuq/;
+
+      add_proto qw/void vp9_highbd_quantize_fp_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *dequant_ptr, const cumbins_type_nuq *cumbins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
+      specialize qw/vp9_highbd_quantize_fp_nuq/;
+
+      add_proto qw/void vp9_highbd_quantize_32x32_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, const int16_t *dequant_ptr, const cumbins_type_nuq *cumbins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
+      specialize qw/vp9_highbd_quantize_32x32_nuq/;
+
+      add_proto qw/void vp9_highbd_quantize_32x32_fp_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *dequant_ptr, const cumbins_type_nuq *cumbins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
+      specialize qw/vp9_highbd_quantize_32x32_fp_nuq/;
+
+      if (vpx_config("CONFIG_TX64X64") eq "yes") {
+        add_proto qw/void vp9_highbd_quantize_64x64_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, const int16_t *dequant_ptr, const cumbins_type_nuq *cumbins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
+        specialize qw/vp9_highbd_quantize_64x64_nuq/;
+
+        add_proto qw/void vp9_highbd_quantize_64x64_fp_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *dequant_ptr, const cumbins_type_nuq *cumbins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
+        specialize qw/vp9_highbd_quantize_64x64_fp_nuq/;
+      }
+    }
  }

  #
--- a/vp9/decoder/vp9_decodeframe.c
+++ b/vp9/decoder/vp9_decodeframe.c
@ -197,9 +197,18 @@ static void read_mv_probs(nmv_context *ctx, int allow_hp, vp9_reader *r) {
 static void setup_plane_dequants(VP9_COMMON *cm, MACROBLOCKD *xd, int q_index) {
  int i;
  xd->plane[0].dequant = cm->y_dequant[q_index];
+#if CONFIG_NEW_QUANT
+  xd->plane[0].dequant_val_nuq =
+      (const dequant_val_type_nuq *)cm->y_dequant_val_nuq[q_index];
+#endif  // CONFIG_NEW_QUANT

-  for (i = 1; i < MAX_MB_PLANE; i++)
+  for (i = 1; i < MAX_MB_PLANE; i++) {
    xd->plane[i].dequant = cm->uv_dequant[q_index];
+#if CONFIG_NEW_QUANT
+    xd->plane[i].dequant_val_nuq =
+        (const dequant_val_type_nuq *)cm->uv_dequant_val_nuq[q_index];
+#endif  // CONFIG_NEW_QUANT
+  }
 }

 #if CONFIG_TX_SKIP
@ -2520,11 +2529,24 @@ void vp9_init_dequantizer(VP9_COMMON *cm) {
  int q;

  for (q = 0; q < QINDEX_RANGE; q++) {
+    int b;
    cm->y_dequant[q][0] = vp9_dc_quant(q, cm->y_dc_delta_q, cm->bit_depth);
    cm->y_dequant[q][1] = vp9_ac_quant(q, 0, cm->bit_depth);

    cm->uv_dequant[q][0] = vp9_dc_quant(q, cm->uv_dc_delta_q, cm->bit_depth);
    cm->uv_dequant[q][1] = vp9_ac_quant(q, cm->uv_ac_delta_q, cm->bit_depth);
+
+#if CONFIG_NEW_QUANT
+    for (b = 0; b < COEF_BANDS; ++b) {
+      vp9_get_dequant_val_nuq(
+          cm->y_dequant[q][b != 0], b, cm->bit_depth,
+          cm->y_dequant_val_nuq[q][b], NULL);
+      vp9_get_dequant_val_nuq(
+          cm->uv_dequant[q][b != 0], b, cm->bit_depth,
+          cm->uv_dequant_val_nuq[q][b], NULL);
+    }
+#endif  // CONFIG_NEW_QUANT
+    (void) b;
  }
 }

--- a/vp9/decoder/vp9_detokenize.c
+++ b/vp9/decoder/vp9_detokenize.c
@ -54,7 +54,11 @@ static const vp9_tree_index coeff_subtree_high[TREE_SIZE(ENTROPY_TOKENS)] = {
 };

 static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd, PLANE_TYPE type,
-                        tran_low_t *dqcoeff, TX_SIZE tx_size, const int16_t *dq,
+                        tran_low_t *dqcoeff, TX_SIZE tx_size,
+                        const int16_t *dq,
+#if CONFIG_NEW_QUANT
+                        const dequant_val_type_nuq *dq_val,
+#endif  // CONFIG_NEW_QUANT
                        int ctx, const int16_t *scan, const int16_t *nb,
                        vp9_reader *r) {
  const int max_eob = 16 << (tx_size << 1);
@ -74,6 +78,12 @@ static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd, PLANE_TYPE type,
  const int dq_shift = (tx_size > TX_16X16) ? tx_size - TX_16X16 : 0;
  int v, token;
  int16_t dqv = dq[0];
+#if CONFIG_NEW_QUANT
+#if CONFIG_TX_SKIP
+  const int use_rect_quant = is_rect_quant_used(&xd->mi[0].src_mi->mbmi, type);
+#endif
+  const tran_low_t *dqv_val = &dq_val[0][0];
+#endif  // CONFIG_NEW_QUANT
  const uint8_t *cat1_prob;
  const uint8_t *cat2_prob;
  const uint8_t *cat3_prob;
@ -125,6 +135,9 @@ static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd, PLANE_TYPE type,
      INCREMENT_COUNT(EOB_MODEL_TOKEN);
      break;
    }
+#if CONFIG_NEW_QUANT
+    dqv_val = &dq_val[band][0];
+#endif  // CONFIG_NEW_QUANT

    while (!vp9_read(r, prob[ZERO_CONTEXT_NODE])) {
      INCREMENT_COUNT(ZERO_TOKEN);
@ -136,6 +149,9 @@ static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd, PLANE_TYPE type,
      ctx = get_coef_context(nb, token_cache, c);
      band = *band_translate++;
      prob = coef_probs[band][ctx];
+#if CONFIG_NEW_QUANT
+      dqv_val = &dq_val[band][0];
+#endif  // CONFIG_NEW_QUANT
    }

    if (!vp9_read(r, prob[ONE_CONTEXT_NODE])) {
@ -191,7 +207,22 @@ static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd, PLANE_TYPE type,
          break;
      }
    }
+#if CONFIG_NEW_QUANT
+#if CONFIG_TX_SKIP
+    if (use_rect_quant) {
+      v = (val * dqv) >> dq_shift;
+    } else {
+      v = vp9_dequant_abscoeff_nuq(val, dqv, dqv_val);
+      v = dq_shift ? ROUND_POWER_OF_TWO(v, dq_shift) : v;
+    }
+#else
+    v = vp9_dequant_abscoeff_nuq(val, dqv, dqv_val);
+    v = dq_shift ? ROUND_POWER_OF_TWO(v, dq_shift) : v;
+#endif  // CONFIG_TX_SKIP
+#else   // CONFIG_NEW_QUANT
    v = (val * dqv) >> dq_shift;
+#endif  // CONFIG_NEW_QUANT
+
 #if CONFIG_COEFFICIENT_RANGE_CHECKING
    dqcoeff[scan[c]] = check_range(vp9_read_bit(r) ? -v : v);
 #else
@ -213,9 +244,15 @@ int vp9_decode_block_tokens(VP9_COMMON *cm, MACROBLOCKD *xd,
  const int ctx = get_entropy_context(tx_size, pd->above_context + x,
                                               pd->left_context + y);
  const scan_order *so = get_scan(xd, tx_size, pd->plane_type, block);
-  const int eob = decode_coefs(cm, xd, pd->plane_type,
-                               BLOCK_OFFSET(pd->dqcoeff, block), tx_size,
-                               pd->dequant, ctx, so->scan, so->neighbors, r);
+  int eob;
+  eob = decode_coefs(cm, xd, pd->plane_type,
+                     BLOCK_OFFSET(pd->dqcoeff, block), tx_size,
+                     pd->dequant,
+#if CONFIG_NEW_QUANT
+                     pd->dequant_val_nuq,
+#endif
+                     ctx, so->scan,
+                     so->neighbors, r);
 #if CONFIG_TX64X64
  if (plane > 0) assert(tx_size != TX_64X64);
 #endif
--- a/vp9/encoder/vp9_block.h
+++ b/vp9/encoder/vp9_block.h
@ -38,6 +38,10 @@ struct macroblock_plane {
  int16_t *quant_shift;
  int16_t *zbin;
  int16_t *round;
+#if CONFIG_NEW_QUANT
+  dequant_val_type_nuq *dequant_val_nuq;
+  cumbins_type_nuq *cumbins_nuq;
+#endif

  int64_t quant_thred[2];
 };
--- a/vp9/encoder/vp9_encodemb.c
+++ b/vp9/encoder/vp9_encodemb.c
--- a/vp9/encoder/vp9_encodemb.h
+++ b/vp9/encoder/vp9_encodemb.h
@ -29,6 +29,16 @@ void vp9_xform_quant_dc(MACROBLOCK *x, int plane, int block,
                        BLOCK_SIZE plane_bsize, TX_SIZE tx_size);
 void vp9_xform_quant(MACROBLOCK *x, int plane, int block,
                     BLOCK_SIZE plane_bsize, TX_SIZE tx_size);
+#if CONFIG_NEW_QUANT
+void vp9_xform_quant_nuq(MACROBLOCK *x, int plane, int block,
+                         BLOCK_SIZE plane_bsize, TX_SIZE tx_size);
+void vp9_xform_quant_dc_nuq(MACROBLOCK *x, int plane, int block,
+                            BLOCK_SIZE plane_bsize, TX_SIZE tx_size);
+void vp9_xform_quant_fp_nuq(MACROBLOCK *x, int plane, int block,
+                            BLOCK_SIZE plane_bsize, TX_SIZE tx_size);
+void vp9_xform_quant_dc_fp_nuq(MACROBLOCK *x, int plane, int block,
+                               BLOCK_SIZE plane_bsize, TX_SIZE tx_size);
+#endif

 void vp9_subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane);

--- a/vp9/encoder/vp9_quantize.c
+++ b/vp9/encoder/vp9_quantize.c
--- a/vp9/encoder/vp9_quantize.h
+++ b/vp9/encoder/vp9_quantize.h
@ -19,6 +19,13 @@ extern "C" {
 #endif

 typedef struct {
+#if CONFIG_NEW_QUANT
+  DECLARE_ALIGNED(16, tran_low_t,
+                  y_cumbins_nuq[QINDEX_RANGE][COEF_BANDS][NUQ_KNOTES]);
+  DECLARE_ALIGNED(16, tran_low_t,
+                  uv_cumbins_nuq[QINDEX_RANGE][COEF_BANDS][NUQ_KNOTES]);
+#endif  // CONFIG_NEW_QUANT
+
  DECLARE_ALIGNED(16, int16_t, y_quant[QINDEX_RANGE][8]);
  DECLARE_ALIGNED(16, int16_t, y_quant_shift[QINDEX_RANGE][8]);
  DECLARE_ALIGNED(16, int16_t, y_zbin[QINDEX_RANGE][8]);
@ -45,12 +52,75 @@ void vp9_quantize_dc_32x32(const tran_low_t *coeff_ptr, int skip_block,
                           const int16_t *round_ptr, const int16_t quant_ptr,
                           tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
                           const int16_t dequant_ptr, uint16_t *eob_ptr);
+#if CONFIG_NEW_QUANT
+void vp9_quantize_dc_nuq(const tran_low_t *coeff_ptr,
+                         int skip_block,
+                         const int16_t quant,
+                         const int16_t quant_shift,
+                         const int16_t dequant,
+                         const tran_low_t *cumbins_ptr,
+                         const tran_low_t *dequant_val,
+                         tran_low_t *qcoeff_ptr,
+                         tran_low_t *dqcoeff_ptr,
+                         uint16_t *eob_ptr);
+void vp9_quantize_dc_32x32_nuq(const tran_low_t *coeff_ptr,
+                               int skip_block,
+                               const int16_t quant,
+                               const int16_t quant_shift,
+                               const int16_t dequant,
+                               const tran_low_t *cumbins_ptr,
+                               const tran_low_t *dequant_val,
+                               tran_low_t *qcoeff_ptr,
+                               tran_low_t *dqcoeff_ptr,
+                               uint16_t *eob_ptr);
+void vp9_quantize_dc_fp_nuq(const tran_low_t *coeff_ptr,
+                            int skip_block,
+                            const int16_t quant,
+                            const int16_t dequant,
+                            const tran_low_t *cumbins_ptr,
+                            const tran_low_t *dequant_val,
+                            tran_low_t *qcoeff_ptr,
+                            tran_low_t *dqcoeff_ptr,
+                            uint16_t *eob_ptr);
+void vp9_quantize_dc_32x32_fp_nuq(const tran_low_t *coeff_ptr,
+                                  int skip_block,
+                                  const int16_t quant,
+                                  const int16_t dequant,
+                                  const tran_low_t *cumbins_ptr,
+                                  const tran_low_t *dequant_val,
+                                  tran_low_t *qcoeff_ptr,
+                                  tran_low_t *dqcoeff_ptr,
+                                  uint16_t *eob_ptr);
+#endif  // CONFIG_NEW_QUANT
+
 #if CONFIG_TX64X64
 void vp9_quantize_dc_64x64(const tran_low_t *coeff_ptr, int skip_block,
                           const int16_t *round_ptr, const int16_t quant_ptr,
                           tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
                           const int16_t dequant_ptr, uint16_t *eob_ptr);
+#if CONFIG_NEW_QUANT
+void vp9_quantize_dc_64x64_nuq(const tran_low_t *coeff_ptr,
+                               int skip_block,
+                               const int16_t quant,
+                               const int16_t quant_shift,
+                               const int16_t dequant,
+                               const tran_low_t *cumbins_ptr,
+                               const tran_low_t *dequant_val,
+                               tran_low_t *qcoeff_ptr,
+                               tran_low_t *dqcoeff_ptr,
+                               uint16_t *eob_ptr);
+void vp9_quantize_dc_64x64_fp_nuq(const tran_low_t *coeff_ptr,
+                                  int skip_block,
+                                  const int16_t quant,
+                                  const int16_t dequant,
+                                  const tran_low_t *cumbins_ptr,
+                                  const tran_low_t *dequant_val,
+                                  tran_low_t *qcoeff_ptr,
+                                  tran_low_t *dqcoeff_ptr,
+                                  uint16_t *eob_ptr);
+#endif  // CONFIG_NEW_QUANT
 #endif  // CONFIG_TX64X64
+
 void vp9_regular_quantize_b_4x4(MACROBLOCK *x, int plane, int block,
                                const int16_t *scan, const int16_t *iscan);

@ -67,6 +137,46 @@ void vp9_highbd_quantize_dc_32x32(const tran_low_t *coeff_ptr,
                                  tran_low_t *dqcoeff_ptr,
                                  const int16_t dequant_ptr,
                                  uint16_t *eob_ptr);
+#if CONFIG_NEW_QUANT
+void vp9_highbd_quantize_dc_nuq(const tran_low_t *coeff_ptr,
+                                int skip_block,
+                                const int16_t quant,
+                                const int16_t quant_shift,
+                                const int16_t dequant,
+                                const tran_low_t *cumbins_ptr,
+                                const tran_low_t *dequant_val,
+                                tran_low_t *qcoeff_ptr,
+                                tran_low_t *dqcoeff_ptr,
+                                uint16_t *eob_ptr);
+void vp9_highbd_quantize_dc_32x32_nuq(const tran_low_t *coeff_ptr,
+                                      int skip_block,
+                                      const int16_t quant,
+                                      const int16_t quant_shift,
+                                      const int16_t dequant,
+                                      const tran_low_t *cumbins_ptr,
+                                      const tran_low_t *dequant_val,
+                                      tran_low_t *qcoeff_ptr,
+                                      tran_low_t *dqcoeff_ptr,
+                                      uint16_t *eob_ptr);
+void vp9_highbd_quantize_dc_fp_nuq(const tran_low_t *coeff_ptr,
+                                   int skip_block,
+                                   const int16_t quant,
+                                   const int16_t dequant,
+                                   const tran_low_t *cumbins_ptr,
+                                   const tran_low_t *dequant_val,
+                                   tran_low_t *qcoeff_ptr,
+                                   tran_low_t *dqcoeff_ptr,
+                                   uint16_t *eob_ptr);
+void vp9_highbd_quantize_dc_32x32_fp_nuq(const tran_low_t *coeff_ptr,
+                                         int skip_block,
+                                         const int16_t quant,
+                                         const int16_t dequant,
+                                         const tran_low_t *cumbins_ptr,
+                                         const tran_low_t *dequant_val,
+                                         tran_low_t *qcoeff_ptr,
+                                         tran_low_t *dqcoeff_ptr,
+                                         uint16_t *eob_ptr);
+#endif  // CONFIG_NEW_QUANT
 #if CONFIG_TX64X64
 void vp9_highbd_quantize_dc_64x64(const tran_low_t *coeff_ptr,
                                  int skip_block,
@ -76,6 +186,27 @@ void vp9_highbd_quantize_dc_64x64(const tran_low_t *coeff_ptr,
                                  tran_low_t *dqcoeff_ptr,
                                  const int16_t dequant_ptr,
                                  uint16_t *eob_ptr);
+#if CONFIG_NEW_QUANT
+void vp9_highbd_quantize_dc_64x64_nuq(const tran_low_t *coeff_ptr,
+                                      int skip_block,
+                                      const int16_t quant,
+                                      const int16_t quant_shift,
+                                      const int16_t dequant,
+                                      const tran_low_t *cumbins_ptr,
+                                      const tran_low_t *dequant_val,
+                                      tran_low_t *qcoeff_ptr,
+                                      tran_low_t *dqcoeff_ptr,
+                                      uint16_t *eob_ptr);
+void vp9_highbd_quantize_dc_64x64_fp_nuq(const tran_low_t *coeff_ptr,
+                                         int skip_block,
+                                         const int16_t quant,
+                                         const int16_t dequant,
+                                         const tran_low_t *cumbins_ptr,
+                                         const tran_low_t *dequant_val,
+                                         tran_low_t *qcoeff_ptr,
+                                         tran_low_t *dqcoeff_ptr,
+                                         uint16_t *eob_ptr);
+#endif  // CONFIG_NEW_QUANT
 #endif  // CONFIG_TX64X64
 #endif  // CONFIG_VP9_HIGHBITDEPTH

@ -101,7 +232,23 @@ void vp9_quantize_rect(const tran_low_t *coeff_ptr, int row, int col,
                       tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
                       const int16_t *dequant_ptr,
                       int logsizeby32, int stride, int has_dc);
-
+#if CONFIG_NEW_QUANT
+void vp9_quantize_rect_nuq(const tran_low_t *coeff_ptr,
+                           int row,
+                           int col,
+                           int stride,
+                           const int16_t *quant_ptr,
+                           const int16_t *quant_shift_ptr,
+                           const int16_t *dequant_ptr,
+                           const cumbins_type_nuq *cumbins_ptr,
+                           const dequant_val_type_nuq *dequant_val,
+                           tran_low_t *qcoeff_ptr,
+                           tran_low_t *dqcoeff_ptr,
+                           uint16_t *eob_ptr,
+                           int logsizeby32,
+                           const int16_t *scan,
+                           const uint8_t *band);
+#endif  // CONFIG_NEW_QUANT
 int get_eob(tran_low_t *qcoeff_ptr, intptr_t n_coeffs, const int16_t *scan);
 #endif

--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@ -559,7 +559,17 @@ static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize,
  } else if (max_txsize_lookup[plane_bsize] == tx_size) {
    if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] == 0) {
      // full forward transform and quantization
-      vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
+#if CONFIG_NEW_QUANT
+      if (x->quant_fp)
+        vp9_xform_quant_fp_nuq(x, plane, block, plane_bsize, tx_size);
+      else
+        vp9_xform_quant_nuq(x, plane, block, plane_bsize, tx_size);
+#else
+      if (x->quant_fp)
+        vp9_xform_quant_fp(x, plane, block, plane_bsize, tx_size);
+      else
+        vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
+#endif
 #if CONFIG_VP9_HIGHBITDEPTH
      if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
        dist_block(plane, block, tx_size, args, xd->bd);
@ -573,7 +583,14 @@ static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize,
      // compute DC coefficient
      tran_low_t *const coeff   = BLOCK_OFFSET(x->plane[plane].coeff, block);
      tran_low_t *const dqcoeff = BLOCK_OFFSET(xd->plane[plane].dqcoeff, block);
+#if CONFIG_NEW_QUANT
+      if (x->quant_fp)
+        vp9_xform_quant_dc_fp_nuq(x, plane, block, plane_bsize, tx_size);
+      else
+        vp9_xform_quant_dc_nuq(x, plane, block, plane_bsize, tx_size);
+#else
      vp9_xform_quant_dc(x, plane, block, plane_bsize, tx_size);
+#endif
      args->sse  = x->bsse[(plane << 2) + (block >> (tx_size << 1))] << 4;
      args->dist = args->sse;
      if (x->plane[plane].eobs[block]) {
@ -598,7 +615,17 @@ static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize,
    }
  } else {
    // full forward transform and quantization
-    vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
+#if CONFIG_NEW_QUANT
+    if (x->quant_fp)
+      vp9_xform_quant_fp_nuq(x, plane, block, plane_bsize, tx_size);
+    else
+      vp9_xform_quant_nuq(x, plane, block, plane_bsize, tx_size);
+#else
+    if (x->quant_fp)
+      vp9_xform_quant_fp(x, plane, block, plane_bsize, tx_size);
+    else
+      vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
+#endif  // CONFIG_NEW_QUANT
 #if CONFIG_VP9_HIGHBITDEPTH
    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
      dist_block(plane, block, tx_size, args, xd->bd);