Complete high bitdepth VAR_TX implementation.
VAR_TX now works in the high bitdepth configuration. Change-Id: I4114d7d9ed59c598f1e4d35b8e75876c07074ba7
This commit is contained in:
		@@ -652,13 +652,8 @@ static void block_rd_txfm(int plane, int block, int blk_row, int blk_col,
 | 
				
			|||||||
    return;
 | 
					    return;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  if (!is_inter_block(mbmi)) {
 | 
					  if (!is_inter_block(mbmi)) {
 | 
				
			||||||
#if CONFIG_VAR_TX
 | 
					 | 
				
			||||||
    struct encode_b_args arg = {x, NULL, &mbmi->skip};
 | 
					    struct encode_b_args arg = {x, NULL, &mbmi->skip};
 | 
				
			||||||
#if CONFIG_VP9_HIGHBITDEPTH
 | 
					#if CONFIG_VAR_TX
 | 
				
			||||||
    vp10_encode_block_intra(plane, block, blk_row, blk_col,
 | 
					 | 
				
			||||||
                            plane_bsize, tx_size, &arg);
 | 
					 | 
				
			||||||
    dist_block(x, plane, block, tx_size, &dist, &sse);
 | 
					 | 
				
			||||||
#else
 | 
					 | 
				
			||||||
    uint8_t *dst, *src;
 | 
					    uint8_t *dst, *src;
 | 
				
			||||||
    int src_stride = x->plane[plane].src.stride;
 | 
					    int src_stride = x->plane[plane].src.stride;
 | 
				
			||||||
    int dst_stride = xd->plane[plane].dst.stride;
 | 
					    int dst_stride = xd->plane[plane].dst.stride;
 | 
				
			||||||
@@ -680,9 +675,7 @@ static void block_rd_txfm(int plane, int block, int blk_row, int blk_col,
 | 
				
			|||||||
    args->cpi->fn_ptr[txsize_to_bsize[tx_size]].vf(src, src_stride,
 | 
					    args->cpi->fn_ptr[txsize_to_bsize[tx_size]].vf(src, src_stride,
 | 
				
			||||||
                                                   dst, dst_stride, &tmp_sse);
 | 
					                                                   dst, dst_stride, &tmp_sse);
 | 
				
			||||||
    dist = (int64_t)tmp_sse * 16;
 | 
					    dist = (int64_t)tmp_sse * 16;
 | 
				
			||||||
#endif  // CONFIG_VP9_HIGHBITDEPTH
 | 
					 | 
				
			||||||
#else
 | 
					#else
 | 
				
			||||||
    struct encode_b_args arg = {x, NULL, &mbmi->skip};
 | 
					 | 
				
			||||||
    vp10_encode_block_intra(plane, block, blk_row, blk_col,
 | 
					    vp10_encode_block_intra(plane, block, blk_row, blk_col,
 | 
				
			||||||
                            plane_bsize, tx_size, &arg);
 | 
					                            plane_bsize, tx_size, &arg);
 | 
				
			||||||
    dist_block(x, plane, block, tx_size, &dist, &sse);
 | 
					    dist_block(x, plane, block, tx_size, &dist, &sse);
 | 
				
			||||||
@@ -1487,18 +1480,20 @@ static int64_t rd_pick_intra4x4block(VP10_COMP *cpi, MACROBLOCK *x,
 | 
				
			|||||||
#if CONFIG_VAR_TX
 | 
					#if CONFIG_VAR_TX
 | 
				
			||||||
            const int coeff_ctx = combine_entropy_contexts(*(tempa + idx),
 | 
					            const int coeff_ctx = combine_entropy_contexts(*(tempa + idx),
 | 
				
			||||||
                                                           *(templ + idy));
 | 
					                                                           *(templ + idy));
 | 
				
			||||||
#endif
 | 
					#endif  // CONFIG_VAR_TX
 | 
				
			||||||
            vp10_highbd_fwd_txfm_4x4(src_diff, coeff, 8, DCT_DCT, 1);
 | 
					            vp10_highbd_fwd_txfm_4x4(src_diff, coeff, 8, DCT_DCT, 1);
 | 
				
			||||||
            vp10_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
 | 
					            vp10_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
 | 
				
			||||||
            ratey += cost_coeffs(x, 0, block,
 | 
					 | 
				
			||||||
#if CONFIG_VAR_TX
 | 
					#if CONFIG_VAR_TX
 | 
				
			||||||
                                 coeff_ctx,
 | 
					            ratey += cost_coeffs(x, 0, block, coeff_ctx, TX_4X4, so->scan,
 | 
				
			||||||
 | 
					                                 so->neighbors, cpi->sf.use_fast_coef_costing);
 | 
				
			||||||
 | 
					            *(tempa + idx) = !(p->eobs[block] == 0);
 | 
				
			||||||
 | 
					            *(templ + idy) = !(p->eobs[block] == 0);
 | 
				
			||||||
#else
 | 
					#else
 | 
				
			||||||
                                 tempa + idx, templ + idy,
 | 
					            ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy,
 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
                                 TX_4X4,
 | 
					                                 TX_4X4,
 | 
				
			||||||
                                 so->scan, so->neighbors,
 | 
					                                 so->scan, so->neighbors,
 | 
				
			||||||
                                 cpi->sf.use_fast_coef_costing);
 | 
					                                 cpi->sf.use_fast_coef_costing);
 | 
				
			||||||
 | 
					#endif  // CONFIG_VAR_TX
 | 
				
			||||||
            if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
 | 
					            if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
 | 
				
			||||||
              goto next_highbd;
 | 
					              goto next_highbd;
 | 
				
			||||||
            vp10_highbd_inv_txfm_add_4x4(BLOCK_OFFSET(pd->dqcoeff, block),
 | 
					            vp10_highbd_inv_txfm_add_4x4(BLOCK_OFFSET(pd->dqcoeff, block),
 | 
				
			||||||
@@ -1511,18 +1506,19 @@ static int64_t rd_pick_intra4x4block(VP10_COMP *cpi, MACROBLOCK *x,
 | 
				
			|||||||
#if CONFIG_VAR_TX
 | 
					#if CONFIG_VAR_TX
 | 
				
			||||||
            const int coeff_ctx = combine_entropy_contexts(*(tempa + idx),
 | 
					            const int coeff_ctx = combine_entropy_contexts(*(tempa + idx),
 | 
				
			||||||
                                                           *(templ + idy));
 | 
					                                                           *(templ + idy));
 | 
				
			||||||
#endif
 | 
					#endif  // CONFIG_VAR_TX
 | 
				
			||||||
            vp10_highbd_fwd_txfm_4x4(src_diff, coeff, 8, tx_type, 0);
 | 
					            vp10_highbd_fwd_txfm_4x4(src_diff, coeff, 8, tx_type, 0);
 | 
				
			||||||
            vp10_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
 | 
					            vp10_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
 | 
				
			||||||
            ratey += cost_coeffs(x, 0, block,
 | 
					 | 
				
			||||||
#if CONFIG_VAR_TX
 | 
					#if CONFIG_VAR_TX
 | 
				
			||||||
                                 coeff_ctx,
 | 
					            ratey += cost_coeffs(x, 0, block, coeff_ctx, TX_4X4, so->scan,
 | 
				
			||||||
 | 
					                                 so->neighbors, cpi->sf.use_fast_coef_costing);
 | 
				
			||||||
 | 
					            *(tempa + idx) = !(p->eobs[block] == 0);
 | 
				
			||||||
 | 
					            *(templ + idy) = !(p->eobs[block] == 0);
 | 
				
			||||||
#else
 | 
					#else
 | 
				
			||||||
                                 tempa + idx, templ + idy,
 | 
					            ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy,
 | 
				
			||||||
#endif
 | 
					                                 TX_4X4, so->scan, so->neighbors,
 | 
				
			||||||
                                 TX_4X4,
 | 
					 | 
				
			||||||
                                 so->scan, so->neighbors,
 | 
					 | 
				
			||||||
                                 cpi->sf.use_fast_coef_costing);
 | 
					                                 cpi->sf.use_fast_coef_costing);
 | 
				
			||||||
 | 
					#endif  // CONFIG_VAR_TX
 | 
				
			||||||
            distortion += vp10_highbd_block_error(
 | 
					            distortion += vp10_highbd_block_error(
 | 
				
			||||||
                coeff, BLOCK_OFFSET(pd->dqcoeff, block),
 | 
					                coeff, BLOCK_OFFSET(pd->dqcoeff, block),
 | 
				
			||||||
                16, &unused, xd->bd) >> 2;
 | 
					                16, &unused, xd->bd) >> 2;
 | 
				
			||||||
@@ -1555,6 +1551,7 @@ static int64_t rd_pick_intra4x4block(VP10_COMP *cpi, MACROBLOCK *x,
 | 
				
			|||||||
next_highbd:
 | 
					next_highbd:
 | 
				
			||||||
      {}
 | 
					      {}
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if (best_rd >= rd_thresh)
 | 
					    if (best_rd >= rd_thresh)
 | 
				
			||||||
      return best_rd;
 | 
					      return best_rd;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -1604,8 +1601,8 @@ next_highbd:
 | 
				
			|||||||
          TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block, TX_4X4);
 | 
					          TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block, TX_4X4);
 | 
				
			||||||
          const scan_order *so = get_scan(TX_4X4, tx_type, 0);
 | 
					          const scan_order *so = get_scan(TX_4X4, tx_type, 0);
 | 
				
			||||||
#if CONFIG_VAR_TX
 | 
					#if CONFIG_VAR_TX
 | 
				
			||||||
          int coeff_ctx = combine_entropy_contexts(*(tempa + idx),
 | 
					          const int coeff_ctx = combine_entropy_contexts(*(tempa + idx),
 | 
				
			||||||
                                                   *(templ + idy));
 | 
					                                                         *(templ + idy));
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
          vp10_fwd_txfm_4x4(src_diff, coeff, 8, DCT_DCT, 1);
 | 
					          vp10_fwd_txfm_4x4(src_diff, coeff, 8, DCT_DCT, 1);
 | 
				
			||||||
          vp10_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
 | 
					          vp10_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
 | 
				
			||||||
@@ -1629,8 +1626,8 @@ next_highbd:
 | 
				
			|||||||
          TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block, TX_4X4);
 | 
					          TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block, TX_4X4);
 | 
				
			||||||
          const scan_order *so = get_scan(TX_4X4, tx_type, 0);
 | 
					          const scan_order *so = get_scan(TX_4X4, tx_type, 0);
 | 
				
			||||||
#if CONFIG_VAR_TX
 | 
					#if CONFIG_VAR_TX
 | 
				
			||||||
          int coeff_ctx = combine_entropy_contexts(*(tempa + idx),
 | 
					          const int coeff_ctx = combine_entropy_contexts(*(tempa + idx),
 | 
				
			||||||
                                                   *(templ + idy));
 | 
					                                                         *(templ + idy));
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
          vp10_fwd_txfm_4x4(src_diff, coeff, 8, tx_type, 0);
 | 
					          vp10_fwd_txfm_4x4(src_diff, coeff, 8, tx_type, 0);
 | 
				
			||||||
          vp10_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
 | 
					          vp10_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
 | 
				
			||||||
@@ -2321,12 +2318,6 @@ void vp10_tx_block_rd_b(const VP10_COMP *cpi, MACROBLOCK *x, TX_SIZE tx_size,
 | 
				
			|||||||
  MACROBLOCKD *xd = &x->e_mbd;
 | 
					  MACROBLOCKD *xd = &x->e_mbd;
 | 
				
			||||||
  const struct macroblock_plane *const p = &x->plane[plane];
 | 
					  const struct macroblock_plane *const p = &x->plane[plane];
 | 
				
			||||||
  struct macroblockd_plane *const pd = &xd->plane[plane];
 | 
					  struct macroblockd_plane *const pd = &xd->plane[plane];
 | 
				
			||||||
#if CONFIG_VP9_HIGHBITDEPTH
 | 
					 | 
				
			||||||
  const int ss_txfrm_size = tx_size << 1;
 | 
					 | 
				
			||||||
  int64_t this_sse;
 | 
					 | 
				
			||||||
  int shift = tx_size == TX_32X32 ? 0 : 2;
 | 
					 | 
				
			||||||
  tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
  unsigned int tmp_sse = 0;
 | 
					  unsigned int tmp_sse = 0;
 | 
				
			||||||
  tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
 | 
					  tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
 | 
				
			||||||
  PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV;
 | 
					  PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV;
 | 
				
			||||||
@@ -2391,35 +2382,59 @@ void vp10_tx_block_rd_b(const VP10_COMP *cpi, MACROBLOCK *x, TX_SIZE tx_size,
 | 
				
			|||||||
    cpi->fn_ptr[txm_bsize].vf(src, src_stride, rec_buffer, 32, &tmp_sse);
 | 
					    cpi->fn_ptr[txm_bsize].vf(src, src_stride, rec_buffer, 32, &tmp_sse);
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#if CONFIG_VP9_HIGHBITDEPTH
 | 
					 | 
				
			||||||
  *dist += vp10_highbd_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
 | 
					 | 
				
			||||||
                                   &this_sse, xd->bd) >> shift;
 | 
					 | 
				
			||||||
  *bsse += this_sse >> shift;
 | 
					 | 
				
			||||||
#else
 | 
					 | 
				
			||||||
  *bsse += (int64_t)tmp_sse * 16;
 | 
					  *bsse += (int64_t)tmp_sse * 16;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  if (p->eobs[block] > 0) {
 | 
					  if (p->eobs[block] > 0) {
 | 
				
			||||||
    switch (tx_size) {
 | 
					    const int lossless = xd->lossless[xd->mi[0]->mbmi.segment_id];
 | 
				
			||||||
      case TX_32X32:
 | 
					#if CONFIG_VP9_HIGHBITDEPTH
 | 
				
			||||||
        vp10_inv_txfm_add_32x32(dqcoeff, rec_buffer, 32, p->eobs[block],
 | 
					    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
 | 
				
			||||||
 | 
					      const int bd = xd->bd;
 | 
				
			||||||
 | 
					      switch (tx_size) {
 | 
				
			||||||
 | 
					        case TX_32X32:
 | 
				
			||||||
 | 
					          vp10_highbd_inv_txfm_add_32x32(dqcoeff, rec_buffer, 32,
 | 
				
			||||||
 | 
					                                         p->eobs[block], bd, tx_type);
 | 
				
			||||||
 | 
					          break;
 | 
				
			||||||
 | 
					        case TX_16X16:
 | 
				
			||||||
 | 
					          vp10_highbd_inv_txfm_add_16x16(dqcoeff, rec_buffer, 32,
 | 
				
			||||||
 | 
					                                         p->eobs[block], bd, tx_type);
 | 
				
			||||||
 | 
					          break;
 | 
				
			||||||
 | 
					        case TX_8X8:
 | 
				
			||||||
 | 
					          vp10_highbd_inv_txfm_add_8x8(dqcoeff, rec_buffer, 32,
 | 
				
			||||||
 | 
					                                       p->eobs[block], bd, tx_type);
 | 
				
			||||||
 | 
					          break;
 | 
				
			||||||
 | 
					        case TX_4X4:
 | 
				
			||||||
 | 
					          vp10_highbd_inv_txfm_add_4x4(dqcoeff, rec_buffer, 32,
 | 
				
			||||||
 | 
					                                       p->eobs[block], bd, tx_type, lossless);
 | 
				
			||||||
 | 
					          break;
 | 
				
			||||||
 | 
					        default:
 | 
				
			||||||
 | 
					          assert(0 && "Invalid transform size");
 | 
				
			||||||
 | 
					          break;
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    } else {
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					#endif  // CONFIG_VP9_HIGHBITDEPTH
 | 
				
			||||||
 | 
					      switch (tx_size) {
 | 
				
			||||||
 | 
					        case TX_32X32:
 | 
				
			||||||
 | 
					          vp10_inv_txfm_add_32x32(dqcoeff, rec_buffer, 32, p->eobs[block],
 | 
				
			||||||
 | 
					                                  tx_type);
 | 
				
			||||||
 | 
					          break;
 | 
				
			||||||
 | 
					        case TX_16X16:
 | 
				
			||||||
 | 
					          vp10_inv_txfm_add_16x16(dqcoeff, rec_buffer, 32, p->eobs[block],
 | 
				
			||||||
 | 
					                                  tx_type);
 | 
				
			||||||
 | 
					          break;
 | 
				
			||||||
 | 
					        case TX_8X8:
 | 
				
			||||||
 | 
					          vp10_inv_txfm_add_8x8(dqcoeff, rec_buffer, 32, p->eobs[block],
 | 
				
			||||||
                                tx_type);
 | 
					                                tx_type);
 | 
				
			||||||
        break;
 | 
					          break;
 | 
				
			||||||
      case TX_16X16:
 | 
					        case TX_4X4:
 | 
				
			||||||
        vp10_inv_txfm_add_16x16(dqcoeff, rec_buffer, 32, p->eobs[block],
 | 
					          vp10_inv_txfm_add_4x4(dqcoeff, rec_buffer, 32, p->eobs[block],
 | 
				
			||||||
                                tx_type);
 | 
					                                tx_type, lossless);
 | 
				
			||||||
        break;
 | 
					          break;
 | 
				
			||||||
      case TX_8X8:
 | 
					        default:
 | 
				
			||||||
        vp10_inv_txfm_add_8x8(dqcoeff, rec_buffer, 32, p->eobs[block],
 | 
					          assert(0 && "Invalid transform size");
 | 
				
			||||||
                              tx_type);
 | 
					          break;
 | 
				
			||||||
        break;
 | 
					      }
 | 
				
			||||||
      case TX_4X4:
 | 
					 | 
				
			||||||
        vp10_inv_txfm_add_4x4(dqcoeff, rec_buffer, 32, p->eobs[block],
 | 
					 | 
				
			||||||
                              tx_type,
 | 
					 | 
				
			||||||
                              xd->lossless[xd->mi[0]->mbmi.segment_id]);
 | 
					 | 
				
			||||||
        break;
 | 
					 | 
				
			||||||
      default:
 | 
					 | 
				
			||||||
        assert(0 && "Invalid transform size");
 | 
					 | 
				
			||||||
        break;
 | 
					 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if ((bh >> 2) + blk_col > max_blocks_wide ||
 | 
					    if ((bh >> 2) + blk_col > max_blocks_wide ||
 | 
				
			||||||
@@ -2444,7 +2459,6 @@ void vp10_tx_block_rd_b(const VP10_COMP *cpi, MACROBLOCK *x, TX_SIZE tx_size,
 | 
				
			|||||||
    }
 | 
					    }
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
  *dist += (int64_t)tmp_sse * 16;
 | 
					  *dist += (int64_t)tmp_sse * 16;
 | 
				
			||||||
#endif  // CONFIG_VP9_HIGHBITDEPTH
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
  *rate += cost_coeffs(x, plane, block, coeff_ctx, tx_size,
 | 
					  *rate += cost_coeffs(x, plane, block, coeff_ctx, tx_size,
 | 
				
			||||||
                       scan_order->scan, scan_order->neighbors, 0);
 | 
					                       scan_order->scan, scan_order->neighbors, 0);
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user