Dual buffer encoding for intra modes

Overall change (using dual buffer scheme for superblocks of both inter
and intra modes) reduces speed 2 runtime:
bluesky_1080p at 6000kbps:   263553ms -> 257441ms
riverbed_1080p at 8000kbps:  233230ms -> 225308ms.

Change-Id: Idf8d70f768a4b0d97b2a8506372c57b7b4022119
This commit is contained in:
Jingning Han 2013-11-07 14:56:58 -08:00
parent 8ce0967df8
commit b6b9143218
6 changed files with 124 additions and 72 deletions

View File

@ -27,16 +27,16 @@ typedef struct {
typedef struct {
MODE_INFO mic;
uint8_t *zcoeff_blk;
int16_t *coeff[MAX_MB_PLANE][2];
int16_t *qcoeff[MAX_MB_PLANE][2];
int16_t *dqcoeff[MAX_MB_PLANE][2];
uint16_t *eobs[MAX_MB_PLANE][2];
int16_t *coeff[MAX_MB_PLANE][3];
int16_t *qcoeff[MAX_MB_PLANE][3];
int16_t *dqcoeff[MAX_MB_PLANE][3];
uint16_t *eobs[MAX_MB_PLANE][3];
// dual buffer pointers, 0: in use, 1: best in store
int16_t *coeff_pbuf[MAX_MB_PLANE][2];
int16_t *qcoeff_pbuf[MAX_MB_PLANE][2];
int16_t *dqcoeff_pbuf[MAX_MB_PLANE][2];
uint16_t *eobs_pbuf[MAX_MB_PLANE][2];
int16_t *coeff_pbuf[MAX_MB_PLANE][3];
int16_t *qcoeff_pbuf[MAX_MB_PLANE][3];
int16_t *dqcoeff_pbuf[MAX_MB_PLANE][3];
uint16_t *eobs_pbuf[MAX_MB_PLANE][3];
int is_coded;
int num_4x4_blk;
@ -94,6 +94,7 @@ struct macroblock {
MACROBLOCKD e_mbd;
int skip_block;
int select_txfm_size;
int skip_recode;
int skip_optimize;
int q_index;

View File

@ -377,6 +377,7 @@ static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
const int mis = cm->mode_info_stride;
const int mi_width = num_8x8_blocks_wide_lookup[bsize];
const int mi_height = num_8x8_blocks_high_lookup[bsize];
int max_plane;
assert(mi->mbmi.mode < MB_MODE_COUNT);
assert(mi->mbmi.ref_frame[0] < MAX_REF_FRAMES);
@ -385,13 +386,21 @@ static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
*mi_addr = *mi;
for (i = 0; i < MAX_MB_PLANE; ++i) {
max_plane = is_inter_block(mbmi) ? MAX_MB_PLANE : 1;
for (i = 0; i < max_plane; ++i) {
p[i].coeff = ctx->coeff_pbuf[i][1];
pd[i].qcoeff = ctx->qcoeff_pbuf[i][1];
pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][1];
pd[i].eobs = ctx->eobs_pbuf[i][1];
}
for (i = max_plane; i < MAX_MB_PLANE; ++i) {
p[i].coeff = ctx->coeff_pbuf[i][2];
pd[i].qcoeff = ctx->qcoeff_pbuf[i][2];
pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][2];
pd[i].eobs = ctx->eobs_pbuf[i][2];
}
// Restore the coding context of the MB to that that was in place
// when the mode was picked for it
for (y = 0; y < mi_height; y++)
@ -619,6 +628,7 @@ static void pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile,
pd[i].eobs = ctx->eobs_pbuf[i][0];
}
ctx->is_coded = 0;
x->skip_recode = 0;
// Set to zero to make sure we do not use the previous encoded frame stats
xd->mi_8x8[0]->mbmi.skip_coeff = 0;
@ -2406,6 +2416,7 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled,
const int mis = cm->mode_info_stride;
const int mi_width = num_8x8_blocks_wide_lookup[bsize];
const int mi_height = num_8x8_blocks_high_lookup[bsize];
x->skip_recode = !x->select_txfm_size && mbmi->sb_type >= BLOCK_8X8;
x->skip_optimize = ctx->is_coded;
ctx->is_coded = 1;
x->use_lp32x32fdct = cpi->sf.use_lp32x32fdct;

View File

@ -432,19 +432,18 @@ static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize,
// TODO(jingning): per transformed block zero forcing only enabled for
// luma component. will integrate chroma components as well.
if (x->zcoeff_blk[tx_size][block] && plane == 0) {
int i, k;
int i, j;
pd->eobs[block] = 0;
txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &k);
txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
ctx->ta[plane][i] = 0;
ctx->tl[plane][k] = 0;
ctx->tl[plane][j] = 0;
return;
}
if (x->select_txfm_size || xd->mi_8x8[0]->mbmi.sb_type < BLOCK_8X8)
if (!x->skip_recode)
vp9_xform_quant(plane, block, plane_bsize, tx_size, arg);
if (x->optimize && (x->select_txfm_size ||
xd->mi_8x8[0]->mbmi.sb_type < BLOCK_8X8|| !x->skip_optimize)) {
if (x->optimize && (!x->skip_recode || !x->skip_optimize)) {
vp9_optimize_b(plane, block, plane_bsize, tx_size, x, ctx);
} else {
int i, k;
@ -515,10 +514,10 @@ void vp9_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize) {
struct optimize_ctx ctx;
struct encode_b_args arg = {x, &ctx};
if (x->select_txfm_size || xd->mi_8x8[0]->mbmi.sb_type < BLOCK_8X8)
if (!x->skip_recode)
vp9_subtract_sb(x, bsize);
if (x->optimize) {
if (x->optimize && (!x->skip_recode || !x->skip_optimize)) {
int i;
for (i = 0; i < MAX_MB_PLANE; ++i)
optimize_init_b(i, bsize, &arg);
@ -563,19 +562,22 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
xoff = 32 * (block & twmask);
yoff = 32 * (block >> twl);
dst = pd->dst.buf + yoff * pd->dst.stride + xoff;
src = p->src.buf + yoff * p->src.stride + xoff;
src_diff = p->src_diff + 4 * bw * yoff + xoff;
vp9_predict_intra_block(xd, block, bwl, TX_32X32, mode,
dst, pd->dst.stride, dst, pd->dst.stride);
vp9_subtract_block(32, 32, src_diff, bw * 4,
src, p->src.stride, dst, pd->dst.stride);
if (x->use_lp32x32fdct)
vp9_fdct32x32_rd(src_diff, coeff, bw * 4);
else
vp9_fdct32x32(src_diff, coeff, bw * 4);
vp9_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round,
p->quant, p->quant_shift, qcoeff, dqcoeff,
pd->dequant, p->zbin_extra, eob, scan, iscan);
if (!x->skip_recode) {
src = p->src.buf + yoff * p->src.stride + xoff;
src_diff = p->src_diff + 4 * bw * yoff + xoff;
vp9_subtract_block(32, 32, src_diff, bw * 4,
src, p->src.stride, dst, pd->dst.stride);
if (x->use_lp32x32fdct)
vp9_fdct32x32_rd(src_diff, coeff, bw * 4);
else
vp9_fdct32x32(src_diff, coeff, bw * 4);
vp9_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round,
p->quant, p->quant_shift, qcoeff, dqcoeff,
pd->dequant, p->zbin_extra, eob, scan, iscan);
}
if (!x->skip_encode && *eob)
vp9_idct32x32_add(dqcoeff, dst, pd->dst.stride, *eob);
break;
@ -588,16 +590,18 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
xoff = 16 * (block & twmask);
yoff = 16 * (block >> twl);
dst = pd->dst.buf + yoff * pd->dst.stride + xoff;
src = p->src.buf + yoff * p->src.stride + xoff;
src_diff = p->src_diff + 4 * bw * yoff + xoff;
vp9_predict_intra_block(xd, block, bwl, TX_16X16, mode,
dst, pd->dst.stride, dst, pd->dst.stride);
vp9_subtract_block(16, 16, src_diff, bw * 4,
src, p->src.stride, dst, pd->dst.stride);
vp9_fht16x16(tx_type, src_diff, coeff, bw * 4);
vp9_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round,
p->quant, p->quant_shift, qcoeff, dqcoeff,
pd->dequant, p->zbin_extra, eob, scan, iscan);
if (!x->skip_recode) {
src = p->src.buf + yoff * p->src.stride + xoff;
src_diff = p->src_diff + 4 * bw * yoff + xoff;
vp9_subtract_block(16, 16, src_diff, bw * 4,
src, p->src.stride, dst, pd->dst.stride);
vp9_fht16x16(tx_type, src_diff, coeff, bw * 4);
vp9_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round,
p->quant, p->quant_shift, qcoeff, dqcoeff,
pd->dequant, p->zbin_extra, eob, scan, iscan);
}
if (!x->skip_encode && *eob)
vp9_iht16x16_add(tx_type, dqcoeff, dst, pd->dst.stride, *eob);
break;
@ -610,16 +614,18 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
xoff = 8 * (block & twmask);
yoff = 8 * (block >> twl);
dst = pd->dst.buf + yoff * pd->dst.stride + xoff;
src = p->src.buf + yoff * p->src.stride + xoff;
src_diff = p->src_diff + 4 * bw * yoff + xoff;
vp9_predict_intra_block(xd, block, bwl, TX_8X8, mode,
dst, pd->dst.stride, dst, pd->dst.stride);
vp9_subtract_block(8, 8, src_diff, bw * 4,
src, p->src.stride, dst, pd->dst.stride);
vp9_fht8x8(tx_type, src_diff, coeff, bw * 4);
vp9_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, p->quant,
p->quant_shift, qcoeff, dqcoeff,
pd->dequant, p->zbin_extra, eob, scan, iscan);
if (!x->skip_recode) {
src = p->src.buf + yoff * p->src.stride + xoff;
src_diff = p->src_diff + 4 * bw * yoff + xoff;
vp9_subtract_block(8, 8, src_diff, bw * 4,
src, p->src.stride, dst, pd->dst.stride);
vp9_fht8x8(tx_type, src_diff, coeff, bw * 4);
vp9_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, p->quant,
p->quant_shift, qcoeff, dqcoeff,
pd->dequant, p->zbin_extra, eob, scan, iscan);
}
if (!x->skip_encode && *eob)
vp9_iht8x8_add(tx_type, dqcoeff, dst, pd->dst.stride, *eob);
break;
@ -635,19 +641,23 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
xoff = 4 * (block & twmask);
yoff = 4 * (block >> twl);
dst = pd->dst.buf + yoff * pd->dst.stride + xoff;
src = p->src.buf + yoff * p->src.stride + xoff;
src_diff = p->src_diff + 4 * bw * yoff + xoff;
vp9_predict_intra_block(xd, block, bwl, TX_4X4, mode,
dst, pd->dst.stride, dst, pd->dst.stride);
vp9_subtract_block(4, 4, src_diff, bw * 4,
src, p->src.stride, dst, pd->dst.stride);
if (tx_type != DCT_DCT)
vp9_short_fht4x4(src_diff, coeff, bw * 4, tx_type);
else
x->fwd_txm4x4(src_diff, coeff, bw * 4);
vp9_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant,
p->quant_shift, qcoeff, dqcoeff,
pd->dequant, p->zbin_extra, eob, scan, iscan);
if (!x->skip_recode) {
src = p->src.buf + yoff * p->src.stride + xoff;
src_diff = p->src_diff + 4 * bw * yoff + xoff;
vp9_subtract_block(4, 4, src_diff, bw * 4,
src, p->src.stride, dst, pd->dst.stride);
if (tx_type != DCT_DCT)
vp9_short_fht4x4(src_diff, coeff, bw * 4, tx_type);
else
x->fwd_txm4x4(src_diff, coeff, bw * 4);
vp9_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant,
p->quant_shift, qcoeff, dqcoeff,
pd->dequant, p->zbin_extra, eob, scan, iscan);
}
if (!x->skip_encode && *eob) {
if (tx_type == DCT_DCT)
// this is like vp9_short_idct4x4 but has a special case around eob<=1

View File

@ -535,6 +535,7 @@ void vp9_first_pass(VP9_COMP *cpi) {
pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][1];
pd[i].eobs = ctx->eobs_pbuf[i][1];
}
x->skip_recode = 0;
// Initialise the MV cost table to the defaults

View File

@ -1452,7 +1452,7 @@ static void alloc_mode_context(VP9_COMMON *cm, int num_4x4_blk,
CHECK_MEM_ERROR(cm, ctx->zcoeff_blk,
vpx_calloc(num_4x4_blk, sizeof(uint8_t)));
for (i = 0; i < MAX_MB_PLANE; ++i) {
for (k = 0; k < 2; ++k) {
for (k = 0; k < 3; ++k) {
CHECK_MEM_ERROR(cm, ctx->coeff[i][k],
vpx_memalign(16, num_pix * sizeof(int16_t)));
CHECK_MEM_ERROR(cm, ctx->qcoeff[i][k],
@ -1474,7 +1474,7 @@ static void free_mode_context(PICK_MODE_CONTEXT *ctx) {
vpx_free(ctx->zcoeff_blk);
ctx->zcoeff_blk = 0;
for (i = 0; i < MAX_MB_PLANE; ++i) {
for (k = 0; k < 2; ++k) {
for (k = 0; k < 3; ++k) {
vpx_free(ctx->coeff[i][k]);
ctx->coeff[i][k] = 0;
vpx_free(ctx->qcoeff[i][k]);

View File

@ -246,7 +246,8 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi) {
vp9_set_speed_features(cpi);
cpi->mb.select_txfm_size = cpi->sf.tx_size_search_method == USE_LARGESTALL ?
cpi->mb.select_txfm_size = (cpi->sf.tx_size_search_method == USE_LARGESTALL &&
cm->frame_type != KEY_FRAME) ?
0 : 1;
set_block_thresholds(cpi);
@ -1329,6 +1330,7 @@ static void super_block_uvrd(VP9_COMP *const cpi, MACROBLOCK *x,
}
static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x,
PICK_MODE_CONTEXT *ctx,
int *rate, int *rate_tokenonly,
int64_t *distortion, int *skippable,
BLOCK_SIZE bsize) {
@ -1364,6 +1366,27 @@ static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x,
*rate_tokenonly = this_rate_tokenonly;
*distortion = this_distortion;
*skippable = s;
if (!x->select_txfm_size) {
int i;
struct macroblock_plane *const p = x->plane;
struct macroblockd_plane *const pd = x->e_mbd.plane;
for (i = 1; i < MAX_MB_PLANE; ++i) {
p[i].coeff = ctx->coeff_pbuf[i][2];
pd[i].qcoeff = ctx->qcoeff_pbuf[i][2];
pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][2];
pd[i].eobs = ctx->eobs_pbuf[i][2];
ctx->coeff_pbuf[i][2] = ctx->coeff_pbuf[i][0];
ctx->qcoeff_pbuf[i][2] = ctx->qcoeff_pbuf[i][0];
ctx->dqcoeff_pbuf[i][2] = ctx->dqcoeff_pbuf[i][0];
ctx->eobs_pbuf[i][2] = ctx->eobs_pbuf[i][0];
ctx->coeff_pbuf[i][0] = p[i].coeff;
ctx->qcoeff_pbuf[i][0] = pd[i].qcoeff;
ctx->dqcoeff_pbuf[i][0] = pd[i].dqcoeff;
ctx->eobs_pbuf[i][0] = pd[i].eobs;
}
}
}
}
@ -1389,8 +1412,9 @@ static int64_t rd_sbuv_dcpred(VP9_COMP *cpi, MACROBLOCK *x,
return this_rd;
}
static void choose_intra_uv_mode(VP9_COMP *cpi, BLOCK_SIZE bsize,
int *rate_uv, int *rate_uv_tokenonly,
static void choose_intra_uv_mode(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
BLOCK_SIZE bsize, int *rate_uv,
int *rate_uv_tokenonly,
int64_t *dist_uv, int *skip_uv,
MB_PREDICTION_MODE *mode_uv) {
MACROBLOCK *const x = &cpi->mb;
@ -1403,7 +1427,7 @@ static void choose_intra_uv_mode(VP9_COMP *cpi, BLOCK_SIZE bsize,
// Else do a proper rd search for each possible transform size that may
// be considered in the main rd loop.
} else {
rd_pick_intra_sbuv_mode(cpi, x,
rd_pick_intra_sbuv_mode(cpi, x, ctx,
rate_uv, rate_uv_tokenonly, dist_uv, skip_uv,
bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize);
}
@ -3033,12 +3057,13 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
return this_rd; // if 0, this will be re-calculated by caller
}
static void swap_block_ptr(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx) {
int i;
static void swap_block_ptr(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
int max_plane) {
struct macroblock_plane *const p = x->plane;
struct macroblockd_plane *const pd = x->e_mbd.plane;
int i;
for (i = 0; i < MAX_MB_PLANE; ++i) {
for (i = 0; i < max_plane; ++i) {
p[i].coeff = ctx->coeff_pbuf[i][1];
pd[i].qcoeff = ctx->qcoeff_pbuf[i][1];
pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][1];
@ -3075,7 +3100,7 @@ void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
*returnrate = INT_MAX;
return;
}
rd_pick_intra_sbuv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly,
rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv, &rate_uv_tokenonly,
&dist_uv, &uv_skip, bsize);
} else {
y_skip = 0;
@ -3084,7 +3109,7 @@ void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
*returnrate = INT_MAX;
return;
}
rd_pick_intra_sbuv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly,
rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv, &rate_uv_tokenonly,
&dist_uv, &uv_skip, BLOCK_8X8);
}
@ -3450,7 +3475,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
uv_tx = MIN(mbmi->tx_size, max_uv_txsize_lookup[bsize]);
if (rate_uv_intra[uv_tx] == INT_MAX) {
choose_intra_uv_mode(cpi, bsize, &rate_uv_intra[uv_tx],
choose_intra_uv_mode(cpi, ctx, bsize, &rate_uv_intra[uv_tx],
&rate_uv_tokenonly[uv_tx],
&dist_uv[uv_tx], &skip_uv[uv_tx],
&mode_uv[uv_tx]);
@ -3584,6 +3609,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
// Did this mode help.. i.e. is it the new best mode
if (this_rd < best_rd || x->skip) {
int max_plane = MAX_MB_PLANE;
if (!mode_excluded) {
// Note index of best mode so far
best_mode_index = mode_index;
@ -3591,6 +3617,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
if (ref_frame == INTRA_FRAME) {
/* required for left and above block mv */
mbmi->mv[0].as_int = 0;
max_plane = 1;
}
*returnrate = rate2;
@ -3599,7 +3626,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
best_mbmode = *mbmi;
best_skip2 = this_skip2;
if (!x->select_txfm_size)
swap_block_ptr(x, ctx);
swap_block_ptr(x, ctx, max_plane);
vpx_memcpy(ctx->zcoeff_blk, x->zcoeff_blk[mbmi->tx_size],
sizeof(uint8_t) * ctx->num_4x4_blk);
@ -3706,7 +3733,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
// Do Intra UV best rd mode selection if best mode choice above was intra.
if (vp9_mode_order[best_mode_index].ref_frame == INTRA_FRAME) {
TX_SIZE uv_tx_size = get_uv_tx_size(mbmi);
rd_pick_intra_sbuv_mode(cpi, x, &rate_uv_intra[uv_tx_size],
rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv_intra[uv_tx_size],
&rate_uv_tokenonly[uv_tx_size],
&dist_uv[uv_tx_size],
&skip_uv[uv_tx_size],
@ -4075,7 +4102,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
distortion2 += distortion_y;
if (rate_uv_intra[TX_4X4] == INT_MAX) {
choose_intra_uv_mode(cpi, bsize, &rate_uv_intra[TX_4X4],
choose_intra_uv_mode(cpi, ctx, bsize, &rate_uv_intra[TX_4X4],
&rate_uv_tokenonly[TX_4X4],
&dist_uv[TX_4X4], &skip_uv[TX_4X4],
&mode_uv[TX_4X4]);
@ -4329,12 +4356,14 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
// Did this mode help.. i.e. is it the new best mode
if (this_rd < best_rd || x->skip) {
if (!mode_excluded) {
int max_plane = MAX_MB_PLANE;
// Note index of best mode so far
best_mode_index = mode_index;
if (ref_frame == INTRA_FRAME) {
/* required for left and above block mv */
mbmi->mv[0].as_int = 0;
max_plane = 1;
}
*returnrate = rate2;
@ -4345,7 +4374,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
best_mbmode = *mbmi;
best_skip2 = this_skip2;
if (!x->select_txfm_size)
swap_block_ptr(x, ctx);
swap_block_ptr(x, ctx, max_plane);
vpx_memcpy(ctx->zcoeff_blk, x->zcoeff_blk[mbmi->tx_size],
sizeof(uint8_t) * ctx->num_4x4_blk);
@ -4452,7 +4481,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
// Do Intra UV best rd mode selection if best mode choice above was intra.
if (vp9_ref_order[best_mode_index].ref_frame == INTRA_FRAME) {
TX_SIZE uv_tx_size = get_uv_tx_size(mbmi);
rd_pick_intra_sbuv_mode(cpi, x, &rate_uv_intra[uv_tx_size],
rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv_intra[uv_tx_size],
&rate_uv_tokenonly[uv_tx_size],
&dist_uv[uv_tx_size],
&skip_uv[uv_tx_size],