Adds various high bit-depth encode functions

Change-Id: I6f67b171022bbc8199c6d674190b57f6bab1b62f
This commit is contained in:
Deb Mukherjee 2014-09-24 06:36:34 -07:00
parent 6989e81d61
commit 993d10a217
15 changed files with 1247 additions and 55 deletions

@ -196,6 +196,64 @@ static void inverse_transform_block(MACROBLOCKD* xd, int plane, int block,
if (eob > 0) {
TX_TYPE tx_type = DCT_DCT;
tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
if (xd->lossless) {
tx_type = DCT_DCT;
vp9_high_iwht4x4_add(dqcoeff, dst, stride, eob, xd->bd);
} else {
const PLANE_TYPE plane_type = pd->plane_type;
switch (tx_size) {
case TX_4X4:
tx_type = get_tx_type_4x4(plane_type, xd, block);
vp9_high_iht4x4_add(tx_type, dqcoeff, dst, stride, eob, xd->bd);
break;
case TX_8X8:
tx_type = get_tx_type(plane_type, xd);
vp9_high_iht8x8_add(tx_type, dqcoeff, dst, stride, eob, xd->bd);
break;
case TX_16X16:
tx_type = get_tx_type(plane_type, xd);
vp9_high_iht16x16_add(tx_type, dqcoeff, dst, stride, eob, xd->bd);
break;
case TX_32X32:
tx_type = DCT_DCT;
vp9_high_idct32x32_add(dqcoeff, dst, stride, eob, xd->bd);
break;
default:
assert(0 && "Invalid transform size");
}
}
} else {
if (xd->lossless) {
tx_type = DCT_DCT;
vp9_iwht4x4_add(dqcoeff, dst, stride, eob);
} else {
const PLANE_TYPE plane_type = pd->plane_type;
switch (tx_size) {
case TX_4X4:
tx_type = get_tx_type_4x4(plane_type, xd, block);
vp9_iht4x4_add(tx_type, dqcoeff, dst, stride, eob);
break;
case TX_8X8:
tx_type = get_tx_type(plane_type, xd);
vp9_iht8x8_add(tx_type, dqcoeff, dst, stride, eob);
break;
case TX_16X16:
tx_type = get_tx_type(plane_type, xd);
vp9_iht16x16_add(tx_type, dqcoeff, dst, stride, eob);
break;
case TX_32X32:
tx_type = DCT_DCT;
vp9_idct32x32_add(dqcoeff, dst, stride, eob);
break;
default:
assert(0 && "Invalid transform size");
return;
}
}
}
#else
if (xd->lossless) {
tx_type = DCT_DCT;
vp9_iwht4x4_add(dqcoeff, dst, stride, eob);
@ -220,8 +278,10 @@ static void inverse_transform_block(MACROBLOCKD* xd, int plane, int block,
break;
default:
assert(0 && "Invalid transform size");
return;
}
}
#endif // CONFIG_VP9_HIGHBITDEPTH
if (eob == 1) {
vpx_memset(dqcoeff, 0, 2 * sizeof(dqcoeff[0]));
@ -599,6 +659,9 @@ static void setup_quantization(VP9_COMMON *const cm, MACROBLOCKD *const xd,
cm->y_dc_delta_q == 0 &&
cm->uv_dc_delta_q == 0 &&
cm->uv_ac_delta_q == 0;
#if CONFIG_VP9_HIGHBITDEPTH
xd->bd = (int)cm->bit_depth;
#endif
}
static INTERP_FILTER read_interp_filter(struct vp9_read_bit_buffer *rb) {
@ -1139,8 +1202,17 @@ BITSTREAM_PROFILE vp9_read_profile(struct vp9_read_bit_buffer *rb) {
static void read_bitdepth_colorspace_sampling(
VP9_COMMON *cm, struct vp9_read_bit_buffer *rb) {
if (cm->profile >= PROFILE_2)
if (cm->profile >= PROFILE_2) {
cm->bit_depth = vp9_rb_read_bit(rb) ? VPX_BITS_12 : VPX_BITS_10;
#if CONFIG_VP9_HIGHBITDEPTH
cm->use_highbitdepth = 1;
#endif
} else {
cm->bit_depth = VPX_BITS_8;
#if CONFIG_VP9_HIGHBITDEPTH
cm->use_highbitdepth = 0;
#endif
}
cm->color_space = (COLOR_SPACE)vp9_rb_read_literal(rb, 3);
if (cm->color_space != SRGB) {
vp9_rb_read_bit(rb); // [16,235] (including xvycc) vs [0,255] range
@ -1244,6 +1316,10 @@ static size_t read_uncompressed_header(VP9Decoder *pbi,
// case (normative).
cm->color_space = BT_601;
cm->subsampling_y = cm->subsampling_x = 1;
cm->bit_depth = VPX_BITS_8;
#if CONFIG_VP9_HIGHBITDEPTH
cm->use_highbitdepth = 0;
#endif
}
pbi->refresh_frame_flags = vp9_rb_read_literal(rb, REF_FRAMES);
@ -1284,6 +1360,9 @@ static size_t read_uncompressed_header(VP9Decoder *pbi,
}
}
}
#if CONFIG_VP9_HIGHBITDEPTH
get_frame_new_buffer(cm)->bit_depth = cm->bit_depth;
#endif
if (pbi->need_resync) {
vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,

@ -34,6 +34,9 @@ static int segment_id[MAX_SEGMENTS] = { 5, 3, 1, 0, 2, 4, 6, 7 };
#define SEGMENT_ID(i) segment_id[(i) - ENERGY_MIN]
DECLARE_ALIGNED(16, static const uint8_t, vp9_64_zeros[64]) = {0};
#if CONFIG_VP9_HIGHBITDEPTH
DECLARE_ALIGNED(16, static const uint16_t, vp9_highbd_64_zeros[64]) = {0};
#endif
unsigned int vp9_vaq_segment_id(int energy) {
ENERGY_IN_BOUNDS(energy);
@ -126,14 +129,40 @@ static unsigned int block_variance(VP9_COMP *cpi, MACROBLOCK *x,
const int bw = 8 * num_8x8_blocks_wide_lookup[bs] - right_overflow;
const int bh = 8 * num_8x8_blocks_high_lookup[bs] - bottom_overflow;
int avg;
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
high_variance(x->plane[0].src.buf, x->plane[0].src.stride,
CONVERT_TO_BYTEPTR(vp9_highbd_64_zeros), 0, bw, bh, &sse,
&avg);
sse >>= 2 * (xd->bd - 8);
avg >>= (xd->bd - 8);
} else {
variance(x->plane[0].src.buf, x->plane[0].src.stride,
vp9_64_zeros, 0, bw, bh, &sse, &avg);
}
#else
variance(x->plane[0].src.buf, x->plane[0].src.stride,
vp9_64_zeros, 0, bw, bh, &sse, &avg);
#endif // CONFIG_VP9_HIGHBITDEPTH
var = sse - (((int64_t)avg * avg) / (bw * bh));
return (256 * var) / (bw * bh);
} else {
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
var = cpi->fn_ptr[bs].vf(x->plane[0].src.buf,
x->plane[0].src.stride,
CONVERT_TO_BYTEPTR(vp9_highbd_64_zeros),
0, &sse);
} else {
var = cpi->fn_ptr[bs].vf(x->plane[0].src.buf,
x->plane[0].src.stride,
vp9_64_zeros, 0, &sse);
}
#else
var = cpi->fn_ptr[bs].vf(x->plane[0].src.buf,
x->plane[0].src.stride,
vp9_64_zeros, 0, &sse);
#endif // CONFIG_VP9_HIGHBITDEPTH
return (256 * var) >> num_pels_log2_lookup[bs];
}
}

@ -120,16 +120,28 @@ static void update_switchable_interp_probs(VP9_COMMON *cm, vp9_writer *w) {
}
static void pack_mb_tokens(vp9_writer *w,
TOKENEXTRA **tp, const TOKENEXTRA *const stop) {
TOKENEXTRA **tp, const TOKENEXTRA *const stop,
vpx_bit_depth_t bit_depth) {
TOKENEXTRA *p = *tp;
while (p < stop && p->token != EOSB_TOKEN) {
const int t = p->token;
const struct vp9_token *const a = &vp9_coef_encodings[t];
const vp9_extra_bit *const b = &vp9_extra_bits[t];
int i = 0;
int v = a->value;
int n = a->len;
#if CONFIG_VP9_HIGHBITDEPTH
const vp9_extra_bit *b;
if (bit_depth == VPX_BITS_12)
b = &vp9_extra_bits_high12[t];
else if (bit_depth == VPX_BITS_10)
b = &vp9_extra_bits_high10[t];
else
b = &vp9_extra_bits[t];
#else
const vp9_extra_bit *const b = &vp9_extra_bits[t];
(void) bit_depth;
#endif // CONFIG_VP9_HIGHBITDEPTH
/* skip one or two nodes */
if (p->skip_eob_node) {
@ -387,7 +399,7 @@ static void write_modes_b(VP9_COMP *cpi, const TileInfo *const tile,
}
assert(*tok < tok_end);
pack_mb_tokens(w, tok, tok_end);
pack_mb_tokens(w, tok, tok_end, cm->bit_depth);
}
static void write_partition(const VP9_COMMON *const cm,

@ -61,16 +61,51 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled,
// Eventually this should be replaced by custom no-reference routines,
// which will be faster.
static const uint8_t VP9_VAR_OFFS[64] = {
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128
};
#if CONFIG_VP9_HIGHBITDEPTH
static const uint16_t VP9_HIGH_VAR_OFFS_8[64] = {
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128
};
static const uint16_t VP9_HIGH_VAR_OFFS_10[64] = {
128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4,
128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4,
128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4,
128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4,
128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4,
128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4,
128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4,
128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4
};
static const uint16_t VP9_HIGH_VAR_OFFS_12[64] = {
128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16,
128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16,
128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16,
128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16,
128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16,
128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16,
128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16,
128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16
};
#endif // CONFIG_VP9_HIGHBITDEPTH
static unsigned int get_sby_perpixel_variance(VP9_COMP *cpi,
const struct buf_2d *ref,
BLOCK_SIZE bs) {
@ -80,6 +115,32 @@ static unsigned int get_sby_perpixel_variance(VP9_COMP *cpi,
return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[bs]);
}
#if CONFIG_VP9_HIGHBITDEPTH
static unsigned int high_get_sby_perpixel_variance(
VP9_COMP *cpi, const struct buf_2d *ref, BLOCK_SIZE bs, int bd) {
unsigned int var, sse;
switch (bd) {
case 10:
var = cpi->fn_ptr[bs].vf(ref->buf, ref->stride,
CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_10),
0, &sse);
break;
case 12:
var = cpi->fn_ptr[bs].vf(ref->buf, ref->stride,
CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_12),
0, &sse);
break;
case 8:
default:
var = cpi->fn_ptr[bs].vf(ref->buf, ref->stride,
CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_8),
0, &sse);
break;
}
return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[bs]);
}
#endif // CONFIG_VP9_HIGHBITDEPTH
static unsigned int get_sby_perpixel_diff_variance(VP9_COMP *cpi,
const struct buf_2d *ref,
int mi_row, int mi_col,
@ -419,6 +480,22 @@ static void choose_partitioning(VP9_COMP *cpi,
} else {
d = VP9_VAR_OFFS;
dp = 0;
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
switch (xd->bd) {
case 10:
d = CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_10);
break;
case 12:
d = CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_12);
break;
case 8:
default:
d = CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_8);
break;
}
}
#endif // CONFIG_VP9_HIGHBITDEPTH
}
// Fill in the entire tree of 8x8 variances for splits.
@ -734,7 +811,17 @@ static void rd_pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile,
// Set to zero to make sure we do not use the previous encoded frame stats
mbmi->skip = 0;
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
x->source_variance =
high_get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize, xd->bd);
} else {
x->source_variance =
get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize);
}
#else
x->source_variance = get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize);
#endif // CONFIG_VP9_HIGHBITDEPTH
// Save rdmult before it might be changed, so it can be restored later.
orig_rdmult = x->rdmult;
@ -3170,9 +3257,34 @@ static int set_var_thresh_from_histogram(VP9_COMP *cpi) {
for (i = 0; i < cm->mb_rows; i++) {
for (j = 0; j < cm->mb_cols; j++) {
#if CONFIG_VP9_HIGHBITDEPTH
if (cm->use_highbitdepth) {
switch (cm->bit_depth) {
case VPX_BITS_8:
vp9_high_get16x16var(src, src_stride, last_src, last_stride,
&var16->sse, &var16->sum);
break;
case VPX_BITS_10:
vp9_high_10_get16x16var(src, src_stride, last_src, last_stride,
&var16->sse, &var16->sum);
break;
case VPX_BITS_12:
vp9_high_12_get16x16var(src, src_stride, last_src, last_stride,
&var16->sse, &var16->sum);
break;
default:
assert(0 && "cm->bit_depth should be VPX_BITS_8, VPX_BITS_10"
" or VPX_BITS_12");
return -1;
}
} else {
vp9_get16x16var(src, src_stride, last_src, last_stride,
&var16->sse, &var16->sum);
}
#else
vp9_get16x16var(src, src_stride, last_src, last_stride,
&var16->sse, &var16->sum);
#endif // CONFIG_VP9_HIGHBITDEPTH
var16->var = var16->sse -
(((uint32_t)var16->sum * var16->sum) >> 8);
@ -3314,7 +3426,15 @@ static void encode_frame_internal(VP9_COMP *cpi) {
cm->tx_mode = select_tx_mode(cpi);
#if CONFIG_VP9_HIGHBITDEPTH
if (cm->use_highbitdepth)
x->fwd_txm4x4 = xd->lossless ? vp9_fwht4x4 : vp9_fdct4x4;
else
x->fwd_txm4x4 = xd->lossless ? vp9_high_fwht4x4 : vp9_high_fdct4x4;
x->high_itxm_add = xd->lossless ? vp9_high_iwht4x4_add : vp9_high_idct4x4_add;
#else
x->fwd_txm4x4 = xd->lossless ? vp9_fwht4x4 : vp9_fdct4x4;
#endif // CONFIG_VP9_HIGHBITDEPTH
x->itxm_add = xd->lossless ? vp9_iwht4x4_add : vp9_idct4x4_add;
if (xd->lossless) {

@ -51,6 +51,29 @@ void vp9_subtract_block_c(int rows, int cols,
}
}
#if CONFIG_VP9_HIGHBITDEPTH
void vp9_high_subtract_block_c(int rows, int cols,
int16_t *diff, ptrdiff_t diff_stride,
const uint8_t *src8, ptrdiff_t src_stride,
const uint8_t *pred8, ptrdiff_t pred_stride,
int bd) {
int r, c;
uint16_t *src = CONVERT_TO_SHORTPTR(src8);
uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
(void) bd;
for (r = 0; r < rows; r++) {
for (c = 0; c < cols; c++) {
diff[c] = src[c] - pred[c];
}
diff += diff_stride;
pred += pred_stride;
src += src_stride;
}
}
#endif // CONFIG_VP9_HIGHBITDEPTH
void vp9_subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) {
struct macroblock_plane *const p = &x->plane[plane];
const struct macroblockd_plane *const pd = &x->e_mbd.plane[plane];
@ -58,6 +81,13 @@ void vp9_subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) {
const int bw = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
const int bh = 4 * num_4x4_blocks_high_lookup[plane_bsize];
#if CONFIG_VP9_HIGHBITDEPTH
if (x->e_mbd.cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
vp9_high_subtract_block(bh, bw, p->src_diff, bw, p->src.buf, p->src.stride,
pd->dst.buf, pd->dst.stride, x->e_mbd.bd);
return;
}
#endif // CONFIG_VP9_HIGHBITDEPTH
vp9_subtract_block(bh, bw, p->src_diff, bw, p->src.buf, p->src.stride,
pd->dst.buf, pd->dst.stride);
}
@ -124,6 +154,8 @@ static int optimize_b(MACROBLOCK *mb, int plane, int block,
int64_t rd_cost0, rd_cost1;
int rate0, rate1, error0, error1, t0, t1;
int best, band, pt, i, final_eob;
const TOKENVALUE *dct_value_tokens;
const int16_t *dct_value_cost;
assert((!type && !plane) || (type && plane));
assert(eob <= default_eob);
@ -140,9 +172,24 @@ static int optimize_b(MACROBLOCK *mb, int plane, int block,
tokens[eob][0].qc = 0;
tokens[eob][1] = tokens[eob][0];
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->bd == 12) {
dct_value_tokens = vp9_dct_value_tokens_high12_ptr;
dct_value_cost = vp9_dct_value_cost_high12_ptr;
} else if (xd->bd == 10) {
dct_value_tokens = vp9_dct_value_tokens_high10_ptr;
dct_value_cost = vp9_dct_value_cost_high10_ptr;
} else {
dct_value_tokens = vp9_dct_value_tokens_ptr;
dct_value_cost = vp9_dct_value_cost_ptr;
}
#else
dct_value_tokens = vp9_dct_value_tokens_ptr;
dct_value_cost = vp9_dct_value_cost_ptr;
#endif
for (i = 0; i < eob; i++)
token_cache[scan[i]] =
vp9_pt_energy_class[vp9_dct_value_tokens_ptr[qcoeff[scan[i]]].token];
vp9_pt_energy_class[dct_value_tokens[qcoeff[scan[i]]].token];
for (i = eob; i-- > 0;) {
int base_bits, d2, dx;
@ -156,7 +203,7 @@ static int optimize_b(MACROBLOCK *mb, int plane, int block,
/* Evaluate the first possibility for this state. */
rate0 = tokens[next][0].rate;
rate1 = tokens[next][1].rate;
t0 = (vp9_dct_value_tokens_ptr + x)->token;
t0 = (dct_value_tokens + x)->token;
/* Consider both possible successor states. */
if (next < default_eob) {
band = band_translate[i + 1];
@ -169,8 +216,13 @@ static int optimize_b(MACROBLOCK *mb, int plane, int block,
UPDATE_RD_COST();
/* And pick the best. */
best = rd_cost1 < rd_cost0;
base_bits = vp9_dct_value_cost_ptr[x];
base_bits = dct_value_cost[x];
dx = mul * (dqcoeff[rc] - coeff[rc]);
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
dx >>= xd->bd - 8;
}
#endif // CONFIG_VP9_HIGHBITDEPTH
d2 = dx * dx;
tokens[i][0].rate = base_bits + (best ? rate1 : rate0);
tokens[i][0].error = d2 + (best ? error1 : error0);
@ -203,7 +255,7 @@ static int optimize_b(MACROBLOCK *mb, int plane, int block,
t0 = tokens[next][0].token == EOB_TOKEN ? EOB_TOKEN : ZERO_TOKEN;
t1 = tokens[next][1].token == EOB_TOKEN ? EOB_TOKEN : ZERO_TOKEN;
} else {
t0 = t1 = (vp9_dct_value_tokens_ptr + x)->token;
t0 = t1 = (dct_value_tokens + x)->token;
}
if (next < default_eob) {
band = band_translate[i + 1];
@ -222,10 +274,19 @@ static int optimize_b(MACROBLOCK *mb, int plane, int block,
UPDATE_RD_COST();
/* And pick the best. */
best = rd_cost1 < rd_cost0;
base_bits = vp9_dct_value_cost_ptr[x];
base_bits = dct_value_cost[x];
if (shortcut) {
dx -= (dequant_ptr[rc != 0] + sz) ^ sz;
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
dx -= ((dequant_ptr[rc != 0] >> (xd->bd - 8)) + sz) ^ sz;
} else {
dx -= (dequant_ptr[rc != 0] + sz) ^ sz;
}
#else
dx -= (dequant_ptr[rc != 0] + sz) ^ sz;
#endif // CONFIG_VP9_HIGHBITDEPTH
d2 = dx * dx;
}
tokens[i][1].rate = base_bits + (best ? rate1 : rate0);
@ -310,7 +371,7 @@ static INLINE void high_fdct32x32(int rd_transform, const int16_t *src,
else
vp9_high_fdct32x32(src, dst, src_stride);
}
#endif
#endif // CONFIG_VP9_HIGHBITDEPTH
void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block,
BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
@ -328,6 +389,44 @@ void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block,
txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
src_diff = &p->src_diff[4 * (j * diff_stride + i)];
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
switch (tx_size) {
case TX_32X32:
high_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
vp9_high_quantize_fp_32x32(coeff, 1024, x->skip_block, p->zbin,
p->round_fp, p->quant_fp, p->quant_shift,
qcoeff, dqcoeff, pd->dequant, p->zbin_extra,
eob, scan_order->scan, scan_order->iscan);
break;
case TX_16X16:
vp9_high_fdct16x16(src_diff, coeff, diff_stride);
vp9_high_quantize_fp(coeff, 256, x->skip_block, p->zbin, p->round_fp,
p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
pd->dequant, p->zbin_extra, eob,
scan_order->scan, scan_order->iscan);
break;
case TX_8X8:
vp9_high_fdct8x8(src_diff, coeff, diff_stride);
vp9_high_quantize_fp(coeff, 64, x->skip_block, p->zbin, p->round_fp,
p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
pd->dequant, p->zbin_extra, eob,
scan_order->scan, scan_order->iscan);
break;
case TX_4X4:
x->fwd_txm4x4(src_diff, coeff, diff_stride);
vp9_high_quantize_fp(coeff, 16, x->skip_block, p->zbin, p->round_fp,
p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
pd->dequant, p->zbin_extra, eob,
scan_order->scan, scan_order->iscan);
break;
default:
assert(0);
}
return;
}
#endif // CONFIG_VP9_HIGHBITDEPTH
switch (tx_size) {
case TX_32X32:
fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
@ -379,6 +478,40 @@ void vp9_xform_quant_dc(MACROBLOCK *x, int plane, int block,
txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
src_diff = &p->src_diff[4 * (j * diff_stride + i)];
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
switch (tx_size) {
case TX_32X32:
vp9_high_fdct32x32_1(src_diff, coeff, diff_stride);
vp9_high_quantize_dc_32x32(coeff, x->skip_block, p->round,
p->quant_fp[0], qcoeff, dqcoeff,
pd->dequant[0], eob);
break;
case TX_16X16:
vp9_high_fdct16x16_1(src_diff, coeff, diff_stride);
vp9_high_quantize_dc(coeff, x->skip_block, p->round,
p->quant_fp[0], qcoeff, dqcoeff,
pd->dequant[0], eob);
break;
case TX_8X8:
vp9_high_fdct8x8_1(src_diff, coeff, diff_stride);
vp9_high_quantize_dc(coeff, x->skip_block, p->round,
p->quant_fp[0], qcoeff, dqcoeff,
pd->dequant[0], eob);
break;
case TX_4X4:
x->fwd_txm4x4(src_diff, coeff, diff_stride);
vp9_high_quantize_dc(coeff, x->skip_block, p->round,
p->quant_fp[0], qcoeff, dqcoeff,
pd->dequant[0], eob);
break;
default:
assert(0);
}
return;
}
#endif // CONFIG_VP9_HIGHBITDEPTH
switch (tx_size) {
case TX_32X32:
vp9_fdct32x32_1(src_diff, coeff, diff_stride);
@ -426,6 +559,44 @@ void vp9_xform_quant(MACROBLOCK *x, int plane, int block,
txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
src_diff = &p->src_diff[4 * (j * diff_stride + i)];
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
switch (tx_size) {
case TX_32X32:
high_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
vp9_high_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin,
p->round, p->quant, p->quant_shift, qcoeff,
dqcoeff, pd->dequant, p->zbin_extra, eob,
scan_order->scan, scan_order->iscan);
break;
case TX_16X16:
vp9_high_fdct16x16(src_diff, coeff, diff_stride);
vp9_high_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round,
p->quant, p->quant_shift, qcoeff, dqcoeff,
pd->dequant, p->zbin_extra, eob,
scan_order->scan, scan_order->iscan);
break;
case TX_8X8:
vp9_high_fdct8x8(src_diff, coeff, diff_stride);
vp9_high_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round,
p->quant, p->quant_shift, qcoeff, dqcoeff,
pd->dequant, p->zbin_extra, eob,
scan_order->scan, scan_order->iscan);
break;
case TX_4X4:
x->fwd_txm4x4(src_diff, coeff, diff_stride);
vp9_high_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round,
p->quant, p->quant_shift, qcoeff, dqcoeff,
pd->dequant, p->zbin_extra, eob,
scan_order->scan, scan_order->iscan);
break;
default:
assert(0);
}
return;
}
#endif // CONFIG_VP9_HIGHBITDEPTH
switch (tx_size) {
case TX_32X32:
fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
@ -520,6 +691,34 @@ static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize,
if (x->skip_encode || p->eobs[block] == 0)
return;
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
switch (tx_size) {
case TX_32X32:
vp9_high_idct32x32_add(dqcoeff, dst, pd->dst.stride,
p->eobs[block], xd->bd);
break;
case TX_16X16:
vp9_high_idct16x16_add(dqcoeff, dst, pd->dst.stride,
p->eobs[block], xd->bd);
break;
case TX_8X8:
vp9_high_idct8x8_add(dqcoeff, dst, pd->dst.stride,
p->eobs[block], xd->bd);
break;
case TX_4X4:
// this is like vp9_short_idct4x4 but has a special case around eob<=1
// which is significant (not just an optimization) for the lossless
// case.
x->high_itxm_add(dqcoeff, dst, pd->dst.stride,
p->eobs[block], xd->bd);
break;
default:
assert(0 && "Invalid transform size");
}
return;
}
#endif // CONFIG_VP9_HIGHBITDEPTH
switch (tx_size) {
case TX_32X32:
@ -557,8 +756,15 @@ static void encode_block_pass1(int plane, int block, BLOCK_SIZE plane_bsize,
vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
if (p->eobs[block] > 0)
if (p->eobs[block] > 0) {
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
x->high_itxm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block], xd->bd);
return;
}
#endif // CONFIG_VP9_HIGHBITDEPTH
x->itxm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
}
}
void vp9_encode_sby_pass1(MACROBLOCK *x, BLOCK_SIZE bsize) {
@ -622,6 +828,115 @@ static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
src = &p->src.buf[4 * (j * src_stride + i)];
src_diff = &p->src_diff[4 * (j * diff_stride + i)];
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
switch (tx_size) {
case TX_32X32:
scan_order = &vp9_default_scan_orders[TX_32X32];
mode = plane == 0 ? mbmi->mode : mbmi->uv_mode;
vp9_predict_intra_block(xd, block >> 6, bwl, TX_32X32, mode,
x->skip_encode ? src : dst,
x->skip_encode ? src_stride : dst_stride,
dst, dst_stride, i, j, plane);
if (!x->skip_recode) {
vp9_high_subtract_block(32, 32, src_diff, diff_stride,
src, src_stride, dst, dst_stride, xd->bd);
high_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
vp9_high_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin,
p->round, p->quant, p->quant_shift, qcoeff,
dqcoeff, pd->dequant, p->zbin_extra, eob,
scan_order->scan, scan_order->iscan);
}
if (!x->skip_encode && *eob) {
vp9_high_idct32x32_add(dqcoeff, dst, dst_stride, *eob, xd->bd);
}
break;
case TX_16X16:
tx_type = get_tx_type(pd->plane_type, xd);
scan_order = &vp9_scan_orders[TX_16X16][tx_type];
mode = plane == 0 ? mbmi->mode : mbmi->uv_mode;
vp9_predict_intra_block(xd, block >> 4, bwl, TX_16X16, mode,
x->skip_encode ? src : dst,
x->skip_encode ? src_stride : dst_stride,
dst, dst_stride, i, j, plane);
if (!x->skip_recode) {
vp9_high_subtract_block(16, 16, src_diff, diff_stride,
src, src_stride, dst, dst_stride, xd->bd);
vp9_high_fht16x16(src_diff, coeff, diff_stride, tx_type);
vp9_high_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round,
p->quant, p->quant_shift, qcoeff, dqcoeff,
pd->dequant, p->zbin_extra, eob,
scan_order->scan, scan_order->iscan);
}
if (!x->skip_encode && *eob) {
vp9_high_iht16x16_add(tx_type, dqcoeff, dst, dst_stride,
*eob, xd->bd);
}
break;
case TX_8X8:
tx_type = get_tx_type(pd->plane_type, xd);
scan_order = &vp9_scan_orders[TX_8X8][tx_type];
mode = plane == 0 ? mbmi->mode : mbmi->uv_mode;
vp9_predict_intra_block(xd, block >> 2, bwl, TX_8X8, mode,
x->skip_encode ? src : dst,
x->skip_encode ? src_stride : dst_stride,
dst, dst_stride, i, j, plane);
if (!x->skip_recode) {
vp9_high_subtract_block(8, 8, src_diff, diff_stride,
src, src_stride, dst, dst_stride, xd->bd);
vp9_high_fht8x8(src_diff, coeff, diff_stride, tx_type);
vp9_high_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round,
p->quant, p->quant_shift, qcoeff, dqcoeff,
pd->dequant, p->zbin_extra, eob,
scan_order->scan, scan_order->iscan);
}
if (!x->skip_encode && *eob) {
vp9_high_iht8x8_add(tx_type, dqcoeff, dst, dst_stride, *eob,
xd->bd);
}
break;
case TX_4X4:
tx_type = get_tx_type_4x4(pd->plane_type, xd, block);
scan_order = &vp9_scan_orders[TX_4X4][tx_type];
mode = plane == 0 ? get_y_mode(xd->mi[0].src_mi, block) : mbmi->uv_mode;
vp9_predict_intra_block(xd, block, bwl, TX_4X4, mode,
x->skip_encode ? src : dst,
x->skip_encode ? src_stride : dst_stride,
dst, dst_stride, i, j, plane);
if (!x->skip_recode) {
vp9_high_subtract_block(4, 4, src_diff, diff_stride,
src, src_stride, dst, dst_stride, xd->bd);
if (tx_type != DCT_DCT)
vp9_high_fht4x4(src_diff, coeff, diff_stride, tx_type);
else
x->fwd_txm4x4(src_diff, coeff, diff_stride);
vp9_high_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round,
p->quant, p->quant_shift, qcoeff, dqcoeff,
pd->dequant, p->zbin_extra, eob,
scan_order->scan, scan_order->iscan);
}
if (!x->skip_encode && *eob) {
if (tx_type == DCT_DCT)
// this is like vp9_short_idct4x4 but has a special case around
// eob<=1 which is significant (not just an optimization) for the
// lossless case.
x->high_itxm_add(dqcoeff, dst, dst_stride, *eob, xd->bd);
else
vp9_high_iht4x4_16_add(dqcoeff, dst, dst_stride, tx_type, xd->bd);
}
break;
default:
assert(0);
return;
}
if (*eob)
*(args->skip) = 0;
return;
}
#endif // CONFIG_VP9_HIGHBITDEPTH
switch (tx_size) {
case TX_32X32:
scan_order = &vp9_default_scan_orders[TX_32X32];

@ -55,6 +55,52 @@ static void copy_and_extend_plane(const uint8_t *src, int src_pitch,
}
}
#if CONFIG_VP9_HIGHBITDEPTH
static void highbd_copy_and_extend_plane(const uint8_t *src8, int src_pitch,
uint8_t *dst8, int dst_pitch,
int w, int h,
int extend_top, int extend_left,
int extend_bottom, int extend_right) {
int i, linesize;
uint16_t *src = CONVERT_TO_SHORTPTR(src8);
uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
// copy the left and right most columns out
const uint16_t *src_ptr1 = src;
const uint16_t *src_ptr2 = src + w - 1;
uint16_t *dst_ptr1 = dst - extend_left;
uint16_t *dst_ptr2 = dst + w;
for (i = 0; i < h; i++) {
vpx_memset16(dst_ptr1, src_ptr1[0], extend_left);
vpx_memcpy(dst_ptr1 + extend_left, src_ptr1, w * sizeof(uint16_t));
vpx_memset16(dst_ptr2, src_ptr2[0], extend_right);
src_ptr1 += src_pitch;
src_ptr2 += src_pitch;
dst_ptr1 += dst_pitch;
dst_ptr2 += dst_pitch;
}
// Now copy the top and bottom lines into each line of the respective
// borders
src_ptr1 = dst - extend_left;
src_ptr2 = dst + dst_pitch * (h - 1) - extend_left;
dst_ptr1 = dst + dst_pitch * (-extend_top) - extend_left;
dst_ptr2 = dst + dst_pitch * (h) - extend_left;
linesize = extend_left + extend_right + w;
for (i = 0; i < extend_top; i++) {
vpx_memcpy(dst_ptr1, src_ptr1, linesize * sizeof(uint16_t));
dst_ptr1 += dst_pitch;
}
for (i = 0; i < extend_bottom; i++) {
vpx_memcpy(dst_ptr2, src_ptr2, linesize * sizeof(uint16_t));
dst_ptr2 += dst_pitch;
}
}
#endif // CONFIG_VP9_HIGHBITDEPTH
void vp9_copy_and_extend_frame(const YV12_BUFFER_CONFIG *src,
YV12_BUFFER_CONFIG *dst) {
// Extend src frame in buffer
@ -75,6 +121,26 @@ void vp9_copy_and_extend_frame(const YV12_BUFFER_CONFIG *src,
const int eb_uv = eb_y >> uv_height_subsampling;
const int er_uv = er_y >> uv_width_subsampling;
#if CONFIG_VP9_HIGHBITDEPTH
if (src->flags & YV12_FLAG_HIGHBITDEPTH) {
highbd_copy_and_extend_plane(src->y_buffer, src->y_stride,
dst->y_buffer, dst->y_stride,
src->y_width, src->y_height,
et_y, el_y, eb_y, er_y);
highbd_copy_and_extend_plane(src->u_buffer, src->uv_stride,
dst->u_buffer, dst->uv_stride,
src->uv_width, src->uv_height,
et_uv, el_uv, eb_uv, er_uv);
highbd_copy_and_extend_plane(src->v_buffer, src->uv_stride,
dst->v_buffer, dst->uv_stride,
src->uv_width, src->uv_height,
et_uv, el_uv, eb_uv, er_uv);
return;
}
#endif // CONFIG_VP9_HIGHBITDEPTH
copy_and_extend_plane(src->y_buffer, src->y_stride,
dst->y_buffer, dst->y_stride,
src->y_width, src->y_height,

@ -281,6 +281,60 @@ static unsigned int get_prediction_error(BLOCK_SIZE bsize,
return sse;
}
#if CONFIG_VP9_HIGHBITDEPTH
static vp9_variance_fn_t highbd_get_block_variance_fn(BLOCK_SIZE bsize,
int bd) {
switch (bd) {
default:
switch (bsize) {
case BLOCK_8X8:
return vp9_high_mse8x8;
case BLOCK_16X8:
return vp9_high_mse16x8;
case BLOCK_8X16:
return vp9_high_mse8x16;
default:
return vp9_high_mse16x16;
}
break;
case 10:
switch (bsize) {
case BLOCK_8X8:
return vp9_high_10_mse8x8;
case BLOCK_16X8:
return vp9_high_10_mse16x8;
case BLOCK_8X16:
return vp9_high_10_mse8x16;
default:
return vp9_high_10_mse16x16;
}
break;
case 12:
switch (bsize) {
case BLOCK_8X8:
return vp9_high_12_mse8x8;
case BLOCK_16X8:
return vp9_high_12_mse16x8;
case BLOCK_8X16:
return vp9_high_12_mse8x16;
default:
return vp9_high_12_mse16x16;
}
break;
}
}
static unsigned int highbd_get_prediction_error(BLOCK_SIZE bsize,
const struct buf_2d *src,
const struct buf_2d *ref,
int bd) {
unsigned int sse;
const vp9_variance_fn_t fn = highbd_get_block_variance_fn(bsize, bd);
fn(src->buf, src->stride, ref->buf, ref->stride, &sse);
return sse;
}
#endif // CONFIG_VP9_HIGHBITDEPTH
// Refine the motion search range according to the frame dimension
// for first pass test.
static int get_search_range(const VP9_COMMON *cm) {
@ -311,6 +365,11 @@ static void first_pass_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
// Override the default variance function to use MSE.
v_fn_ptr.vf = get_block_variance_fn(bsize);
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
v_fn_ptr.vf = highbd_get_block_variance_fn(bsize, xd->bd);
}
#endif // CONFIG_VP9_HIGHBITDEPTH
// Center the initial step/diamond search on best mv.
tmp_err = cpi->diamond_search_sad(x, &cpi->ss_cfg, &ref_mv_full, &tmp_mv,
@ -562,6 +621,24 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) {
(bsize >= BLOCK_16X16 ? TX_16X16 : TX_8X8) : TX_4X4;
vp9_encode_intra_block_plane(x, bsize, 0);
this_error = vp9_get_mb_ss(x->plane[0].src_diff);
#if CONFIG_VP9_HIGHBITDEPTH
if (cm->use_highbitdepth) {
switch (cm->bit_depth) {
case VPX_BITS_8:
break;
case VPX_BITS_10:
this_error >>= 4;
break;
case VPX_BITS_12:
this_error >>= 8;
break;
default:
assert(0 && "cm->bit_depth should be VPX_BITS_8, "
"VPX_BITS_10 or VPX_BITS_12");
return;
}
}
#endif // CONFIG_VP9_HIGHBITDEPTH
if (cpi->oxcf.aq_mode == VARIANCE_AQ) {
vp9_clear_system_state();
@ -601,8 +678,18 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) {
struct buf_2d unscaled_last_source_buf_2d;
xd->plane[0].pre[0].buf = first_ref_buf->y_buffer + recon_yoffset;
motion_error = get_prediction_error(bsize, &x->plane[0].src,
&xd->plane[0].pre[0]);
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
motion_error = highbd_get_prediction_error(
bsize, &x->plane[0].src, &xd->plane[0].pre[0], xd->bd);
} else {
motion_error = get_prediction_error(
bsize, &x->plane[0].src, &xd->plane[0].pre[0]);
}
#else
motion_error = get_prediction_error(
bsize, &x->plane[0].src, &xd->plane[0].pre[0]);
#endif // CONFIG_VP9_HIGHBITDEPTH
// Compute the motion error of the 0,0 motion using the last source
// frame as the reference. Skip the further motion search on
@ -611,8 +698,18 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) {
cpi->unscaled_last_source->y_buffer + recon_yoffset;
unscaled_last_source_buf_2d.stride =
cpi->unscaled_last_source->y_stride;
raw_motion_error = get_prediction_error(bsize, &x->plane[0].src,
&unscaled_last_source_buf_2d);
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
raw_motion_error = highbd_get_prediction_error(
bsize, &x->plane[0].src, &unscaled_last_source_buf_2d, xd->bd);
} else {
raw_motion_error = get_prediction_error(
bsize, &x->plane[0].src, &unscaled_last_source_buf_2d);
}
#else
raw_motion_error = get_prediction_error(
bsize, &x->plane[0].src, &unscaled_last_source_buf_2d);
#endif // CONFIG_VP9_HIGHBITDEPTH
// TODO(pengchong): Replace the hard-coded threshold
if (raw_motion_error > 25 || lc != NULL) {
@ -648,8 +745,18 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) {
int gf_motion_error;
xd->plane[0].pre[0].buf = gld_yv12->y_buffer + recon_yoffset;
gf_motion_error = get_prediction_error(bsize, &x->plane[0].src,
&xd->plane[0].pre[0]);
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
gf_motion_error = highbd_get_prediction_error(
bsize, &x->plane[0].src, &xd->plane[0].pre[0], xd->bd);
} else {
gf_motion_error = get_prediction_error(
bsize, &x->plane[0].src, &xd->plane[0].pre[0]);
}
#else
gf_motion_error = get_prediction_error(
bsize, &x->plane[0].src, &xd->plane[0].pre[0]);
#endif // CONFIG_VP9_HIGHBITDEPTH
first_pass_motion_search(cpi, x, &zero_mv, &tmp_mv,
&gf_motion_error);

@ -284,16 +284,7 @@ static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c) {
int tc = bc; \
\
bestmv->row *= 8; \
bestmv->col *= 8; \
if (second_pred != NULL) { \
DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64); \
vp9_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride); \
besterr = vfp->vf(comp_pred, w, z, src_stride, sse1); \
} else { \
besterr = vfp->vf(y + offset, y_stride, z, src_stride, sse1); \
} \
*distortion = besterr; \
besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
bestmv->col *= 8;
int vp9_find_best_sub_pixel_tree_pruned(const MACROBLOCK *x,
MV *bestmv, const MV *ref_mv,
@ -309,6 +300,29 @@ int vp9_find_best_sub_pixel_tree_pruned(const MACROBLOCK *x,
const uint8_t *second_pred,
int w, int h) {
SETUP_SUBPEL_SEARCH;
if (second_pred != NULL) {
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
DECLARE_ALIGNED_ARRAY(16, uint16_t, comp_pred16, 64 * 64);
vp9_high_comp_avg_pred(comp_pred16, second_pred, w, h, y + offset,
y_stride);
besterr = vfp->vf(CONVERT_TO_BYTEPTR(comp_pred16), w, z, src_stride,
sse1);
} else {
DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64);
vp9_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride);
besterr = vfp->vf(comp_pred, w, z, src_stride, sse1);
}
#else
DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64);
vp9_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride);
besterr = vfp->vf(comp_pred, w, z, src_stride, sse1);
#endif // CONFIG_VP9_HIGHBITDEPTH
} else {
besterr = vfp->vf(y + offset, y_stride, z, src_stride, sse1);
}
*distortion = besterr;
besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
if (sad_list &&
sad_list[0] != INT_MAX && sad_list[1] != INT_MAX &&
@ -401,6 +415,29 @@ int vp9_find_best_sub_pixel_tree(const MACROBLOCK *x,
const uint8_t *second_pred,
int w, int h) {
SETUP_SUBPEL_SEARCH;
if (second_pred != NULL) {
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
DECLARE_ALIGNED_ARRAY(16, uint16_t, comp_pred16, 64 * 64);
vp9_high_comp_avg_pred(comp_pred16, second_pred, w, h, y + offset,
y_stride);
besterr = vfp->vf(CONVERT_TO_BYTEPTR(comp_pred16), w, z, src_stride,
sse1);
} else {
DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64);
vp9_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride);
besterr = vfp->vf(comp_pred, w, z, src_stride, sse1);
}
#else
DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64);
vp9_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride);
besterr = vfp->vf(comp_pred, w, z, src_stride, sse1);
#endif // CONFIG_VP9_HIGHBITDEPTH
} else {
besterr = vfp->vf(y + offset, y_stride, z, src_stride, sse1);
}
*distortion = besterr;
besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
(void) sad_list; // to silence compiler warning
// Each subsequent iteration checks at least one point in

@ -40,7 +40,15 @@ static int try_filter_frame(const YV12_BUFFER_CONFIG *sd, VP9_COMP *const cpi,
vp9_loop_filter_frame(cm->frame_to_show, cm, &cpi->mb.e_mbd, filt_level, 1,
partial_frame);
#if CONFIG_VP9_HIGHBITDEPTH
if (cm->use_highbitdepth) {
filt_err = vp9_highbd_get_y_sse(sd, cm->frame_to_show, cm->bit_depth);
} else {
filt_err = vp9_get_y_sse(sd, cm->frame_to_show);
}
#else
filt_err = vp9_get_y_sse(sd, cm->frame_to_show);
#endif // CONFIG_VP9_HIGHBITDEPTH
// Re-instate the unfiltered frame
vpx_yv12_copy_y(&cpi->last_frame_uf, cm->frame_to_show);
@ -145,7 +153,26 @@ void vp9_pick_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi,
const int q = vp9_ac_quant(cm->base_qindex, 0, cm->bit_depth);
// These values were determined by linear fitting the result of the
// searched level, filt_guess = q * 0.316206 + 3.87252
#if CONFIG_VP9_HIGHDEPTH
int filt_guess;
switch (cm->bit_depth) {
case VPX_BITS_8:
filt_guess = ROUND_POWER_OF_TWO(q * 20723 + 1015158, 18);
break;
case VPX_BITS_10:
filt_guess = ROUND_POWER_OF_TWO(q * 20723 + 4060632, 20);
break;
case VPX_BITS_12:
filt_guess = ROUND_POWER_OF_TWO(q * 20723 + 16242526, 22);
break;
default:
assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 "
"or VPX_BITS_12");
return;
}
#else
int filt_guess = ROUND_POWER_OF_TWO(q * 20723 + 1015158, 18);
#endif // CONFIG_VP9_HIGHBITDEPTH
if (cm->frame_type == KEY_FRAME)
filt_guess -= 4;
lf->filter_level = clamp(filt_guess, min_filter_level, max_filter_level);

@ -241,13 +241,44 @@ static void model_rd_for_sb_y(VP9_COMP *cpi, BLOCK_SIZE bsize,
tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
}
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
vp9_model_rd_from_var_lapndz(sse - var, 1 << num_pels_log2_lookup[bsize],
dc_quant >> (xd->bd - 5), &rate, &dist);
} else {
vp9_model_rd_from_var_lapndz(sse - var, 1 << num_pels_log2_lookup[bsize],
dc_quant >> 3, &rate, &dist);
}
#else
vp9_model_rd_from_var_lapndz(sse - var, 1 << num_pels_log2_lookup[bsize],
dc_quant >> 3, &rate, &dist);
#endif // CONFIG_VP9_HIGHBITDEPTH
*out_rate_sum = rate >> 1;
*out_dist_sum = dist << 3;
vp9_model_rd_from_var_lapndz(var, 1 << num_pels_log2_lookup[bsize],
ac_quant >> 3, &rate, &dist);
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
vp9_model_rd_from_var_lapndz(var,
1 << num_pels_log2_lookup[bsize],
ac_quant >> (xd->bd - 5),
&rate,
&dist);
} else {
vp9_model_rd_from_var_lapndz(var,
1 << num_pels_log2_lookup[bsize],
ac_quant >> 3,
&rate,
&dist);
}
#else
vp9_model_rd_from_var_lapndz(var,
1 << num_pels_log2_lookup[bsize],
ac_quant >> 3,
&rate,
&dist);
#endif // CONFIG_VP9_HIGHBITDEPTH
*out_rate_sum += rate;
*out_dist_sum += dist << 4;
}
@ -293,9 +324,17 @@ static void encode_breakout_test(VP9_COMP *cpi, MACROBLOCK *x,
// The encode_breakout input
const unsigned int min_thresh =
MIN(((unsigned int)x->encode_breakout << 4), max_thresh);
#if CONFIG_VP9_HIGHBITDEPTH
const int shift = 2 * xd->bd - 16;
#endif
// Calculate threshold according to dequant value.
thresh_ac = (xd->plane[0].dequant[1] * xd->plane[0].dequant[1]) / 9;
#if CONFIG_VP9_HIGHBITDEPTH
if ((xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) && shift > 0) {
thresh_ac = ROUND_POWER_OF_TWO(thresh_ac, shift);
}
#endif // CONFIG_VP9_HIGHBITDEPTH
thresh_ac = clamp(thresh_ac, min_thresh, max_thresh);
// Adjust ac threshold according to partition size.
@ -303,6 +342,11 @@ static void encode_breakout_test(VP9_COMP *cpi, MACROBLOCK *x,
8 - (b_width_log2(bsize) + b_height_log2(bsize));
thresh_dc = (xd->plane[0].dequant[0] * xd->plane[0].dequant[0] >> 6);
#if CONFIG_VP9_HIGHBITDEPTH
if ((xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) && shift > 0) {
thresh_dc = ROUND_POWER_OF_TWO(thresh_dc, shift);
}
#endif // CONFIG_VP9_HIGHBITDEPTH
} else {
thresh_ac = 0;
thresh_dc = 0;
@ -438,9 +482,8 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
// var_y and sse_y are saved to be used in skipping checking
unsigned int var_y = UINT_MAX;
unsigned int sse_y = UINT_MAX;
const int intra_cost_penalty =
20 * vp9_dc_quant(cm->base_qindex, cm->y_dc_delta_q, cm->bit_depth);
const int intra_cost_penalty = vp9_get_intra_cost_penalty(
cm->base_qindex, cm->y_dc_delta_q, cm->bit_depth);
const int64_t inter_mode_thresh = RDCOST(x->rdmult, x->rddiv,
intra_cost_penalty, 0);
const int intra_mode_cost = 50;
@ -461,14 +504,25 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
// tmp[3] points to dst buffer, and the other 3 point to allocated buffers.
PRED_BUFFER tmp[4];
DECLARE_ALIGNED_ARRAY(16, uint8_t, pred_buf, 3 * 64 * 64);
#if CONFIG_VP9_HIGHBITDEPTH
DECLARE_ALIGNED_ARRAY(16, uint16_t, pred_buf_16, 3 * 64 * 64);
#endif
struct buf_2d orig_dst = pd->dst;
PRED_BUFFER *best_pred = NULL;
PRED_BUFFER *this_mode_pred = NULL;
const int pixels_in_block = bh * bw;
if (cpi->sf.reuse_inter_pred_sby) {
int i;
for (i = 0; i < 3; i++) {
tmp[i].data = &pred_buf[bw * bh * i];
#if CONFIG_VP9_HIGHBITDEPTH
if (cm->use_highbitdepth)
tmp[i].data = CONVERT_TO_BYTEPTR(&pred_buf_16[pixels_in_block * i]);
else
tmp[i].data = &pred_buf[pixels_in_block * i];
#else
tmp[i].data = &pred_buf[pixels_in_block * i];
#endif // CONFIG_VP9_HIGHBITDEPTH
tmp[i].stride = bw;
tmp[i].in_use = 0;
}
@ -703,8 +757,18 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
if (best_pred != NULL && cpi->sf.reuse_inter_pred_sby &&
best_pred->data != orig_dst.buf) {
pd->dst = orig_dst;
#if CONFIG_VP9_HIGHBITDEPTH
if (cm->use_highbitdepth) {
vp9_high_convolve_copy(best_pred->data, bw, pd->dst.buf, pd->dst.stride,
NULL, 0, NULL, 0, bw, bh, xd->bd);
} else {
vp9_convolve_copy(best_pred->data, bw, pd->dst.buf, pd->dst.stride,
NULL, 0, NULL, 0, bw, bh);
}
#else
vp9_convolve_copy(best_pred->data, bw, pd->dst.buf, pd->dst.stride, NULL, 0,
NULL, 0, bw, bh);
#endif // CONFIG_VP9_HIGHBITDEPTH
}
mbmi->mode = best_mode;

@ -155,7 +155,7 @@ int vp9_compute_rd_mult(const VP9_COMP *cpi, int qindex) {
}
#else
int rdmult = 88 * q * q / 24;
#endif
#endif // CONFIG_VP9_HIGHBITDEPTH
if (cpi->oxcf.pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
const FRAME_UPDATE_TYPE frame_type = gf_group->update_type[gf_group->index];
@ -187,7 +187,7 @@ static int compute_rd_thresh_factor(int qindex, vpx_bit_depth_t bit_depth) {
#else
(void) bit_depth;
q = vp9_dc_quant(qindex, 0, VPX_BITS_8) / 4.0;
#endif
#endif // CONFIG_VP9_HIGHBITDEPTH
// TODO(debargha): Adjust the function below.
return MAX((int)(pow(q, RD_THRESH_POW) * 5.12), 8);
}
@ -213,7 +213,7 @@ void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex) {
#else
cpi->mb.sadperbit16 = sad_per_bit16lut_8[qindex];
cpi->mb.sadperbit4 = sad_per_bit4lut_8[qindex];
#endif
#endif // CONFIG_VP9_HIGHBITDEPTH
}
static void set_block_thresholds(const VP9_COMMON *cm, RD_OPT *rd) {
@ -598,3 +598,24 @@ void vp9_set_rd_speed_thresholds_sub8x8(VP9_COMP *cpi) {
if (sf->disable_split_mask & (1 << i))
rd->thresh_mult_sub8x8[i] = INT_MAX;
}
int vp9_get_intra_cost_penalty(int qindex, int qdelta,
vpx_bit_depth_t bit_depth) {
const int q = vp9_dc_quant(qindex, qdelta, bit_depth);
#if CONFIG_VP9_HIGHBITDEPTH
switch (bit_depth) {
case VPX_BITS_8:
return 20 * q;
case VPX_BITS_10:
return 5 * q;
case VPX_BITS_12:
return ROUND_POWER_OF_TWO(5 * q, 2);
default:
assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
return -1;
}
#else
return 20 * q;
#endif // CONFIG_VP9_HIGHBITDEPTH
}

@ -162,6 +162,10 @@ void vp9_setup_pred_block(const MACROBLOCKD *xd,
int mi_row, int mi_col,
const struct scale_factors *scale,
const struct scale_factors *scale_uv);
int vp9_get_intra_cost_penalty(int qindex, int qdelta,
vpx_bit_depth_t bit_depth);
#ifdef __cplusplus
} // extern "C"
#endif

@ -228,9 +228,13 @@ static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize,
// Fast approximate the modelling function.
if (cpi->oxcf.speed > 4) {
int64_t rate;
int64_t dist;
int64_t square_error = sse;
int quantizer = (pd->dequant[1] >> 3);
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
quantizer >>= (xd->bd - 8);
}
#endif // CONFIG_VP9_HIGHBITDEPTH
if (quantizer < 120)
rate = (square_error * (280 - quantizer)) >> 8;
@ -240,8 +244,19 @@ static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize,
rate_sum += rate;
dist_sum += dist;
} else {
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
vp9_model_rd_from_var_lapndz(sse, 1 << num_pels_log2_lookup[bs],
pd->dequant[1] >> (xd->bd - 5),
&rate, &dist);
} else {
vp9_model_rd_from_var_lapndz(sse, 1 << num_pels_log2_lookup[bs],
pd->dequant[1] >> 3, &rate, &dist);
}
#else
vp9_model_rd_from_var_lapndz(sum_sse, 1 << num_pels_log2_lookup[bs],
pd->dequant[1] >> 3, &rate, &dist);
#endif // CONFIG_VP9_HIGHBITDEPTH
rate_sum += rate;
dist_sum += dist;
}
@ -266,6 +281,31 @@ int64_t vp9_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff,
return error;
}
#if CONFIG_VP9_HIGHBITDEPTH
int64_t vp9_high_block_error_c(const tran_low_t *coeff,
const tran_low_t *dqcoeff,
intptr_t block_size,
int64_t *ssz, int bd) {
int i;
int64_t error = 0, sqcoeff = 0;
int shift = 2 * (bd - 8);
int rounding = shift > 0 ? 1 << (shift - 1) : 0;
for (i = 0; i < block_size; i++) {
const int64_t diff = coeff[i] - dqcoeff[i];
error += diff * diff;
sqcoeff += (int64_t)coeff[i] * (int64_t)coeff[i];
}
assert(error >= 0 && sqcoeff >= 0);
error = (error + rounding) >> shift;
sqcoeff = (sqcoeff + rounding) >> shift;
*ssz = sqcoeff;
return error;
}
#endif // CONFIG_VP9_HIGHBITDEPTH
/* The trailing '0' is a terminator which is used inside cost_coeffs() to
* decide whether to include cost of a trailing EOB node or not (i.e. we
* can skip this if the last coefficient in this transform block, e.g. the
@ -351,8 +391,14 @@ static INLINE int cost_coeffs(MACROBLOCK *x,
return cost;
}
#if CONFIG_VP9_HIGHBITDEPTH
static void dist_block(int plane, int block, TX_SIZE tx_size,
struct rdcost_block_args* args, int bd) {
#else
static void dist_block(int plane, int block, TX_SIZE tx_size,
struct rdcost_block_args* args) {
#endif // CONFIG_VP9_HIGHBITDEPTH
const int ss_txfrm_size = tx_size << 1;
MACROBLOCK* const x = args->x;
MACROBLOCKD* const xd = &x->e_mbd;
@ -362,14 +408,24 @@ static void dist_block(int plane, int block, TX_SIZE tx_size,
int shift = tx_size == TX_32X32 ? 0 : 2;
tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
#if CONFIG_VP9_HIGHBITDEPTH
args->dist = vp9_high_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
&this_sse, bd) >> shift;
#else
args->dist = vp9_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
&this_sse) >> shift;
#endif // CONFIG_VP9_HIGHBITDEPTH
args->sse = this_sse >> shift;
if (x->skip_encode && !is_inter_block(&xd->mi[0].src_mi->mbmi)) {
// TODO(jingning): tune the model to better capture the distortion.
int64_t p = (pd->dequant[1] * pd->dequant[1] *
(1 << ss_txfrm_size)) >> (shift + 2);
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
p >>= ((xd->bd - 8) * 2);
}
#endif // CONFIG_VP9_HIGHBITDEPTH
args->dist += (p >> 4);
args->sse += p;
}
@ -399,12 +455,28 @@ static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize,
if (!is_inter_block(mbmi)) {
vp9_encode_block_intra(x, plane, block, plane_bsize, tx_size, &mbmi->skip);
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
dist_block(plane, block, tx_size, args, xd->bd);
} else {
dist_block(plane, block, tx_size, args, 8);
}
#else
dist_block(plane, block, tx_size, args);
#endif // CONFIG_VP9_HIGHBITDEPTH
} else if (max_txsize_lookup[plane_bsize] == tx_size) {
if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] == 0) {
// full forward transform and quantization
vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
dist_block(plane, block, tx_size, args, xd->bd);
} else {
dist_block(plane, block, tx_size, args, 8);
}
#else
dist_block(plane, block, tx_size, args);
#endif // CONFIG_VP9_HIGHBITDEPTH
} else if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] == 2) {
// compute DC coefficient
tran_low_t *const coeff = BLOCK_OFFSET(x->plane[plane].coeff, block);
@ -424,7 +496,15 @@ static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize,
} else {
// full forward transform and quantization
vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
dist_block(plane, block, tx_size, args, xd->bd);
} else {
dist_block(plane, block, tx_size, args, 8);
}
#else
dist_block(plane, block, tx_size, args);
#endif // CONFIG_VP9_HIGHBITDEPTH
}
rate_block(plane, block, plane_bsize, tx_size, args);
@ -659,6 +739,9 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
int idx, idy;
uint8_t best_dst[8 * 8];
#if CONFIG_VP9_HIGHBITDEPTH
uint16_t best_dst16[8 * 8];
#endif
assert(ib < 4);
@ -666,6 +749,108 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
vpx_memcpy(tl, l, sizeof(tl));
xd->mi[0].src_mi->mbmi.tx_size = TX_4X4;
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
int64_t this_rd;
int ratey = 0;
int64_t distortion = 0;
int rate = bmode_costs[mode];
if (!(cpi->sf.intra_y_mode_mask[TX_4X4] & (1 << mode)))
continue;
// Only do the oblique modes if the best so far is
// one of the neighboring directional modes
if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
if (conditional_skipintra(mode, *best_mode))
continue;
}
vpx_memcpy(tempa, ta, sizeof(ta));
vpx_memcpy(templ, tl, sizeof(tl));
for (idy = 0; idy < num_4x4_blocks_high; ++idy) {
for (idx = 0; idx < num_4x4_blocks_wide; ++idx) {
const int block = ib + idy * 2 + idx;
const uint8_t *const src = &src_init[idx * 4 + idy * 4 * src_stride];
uint8_t *const dst = &dst_init[idx * 4 + idy * 4 * dst_stride];
int16_t *const src_diff = raster_block_offset_int16(BLOCK_8X8, block,
p->src_diff);
tran_low_t *const coeff = BLOCK_OFFSET(x->plane[0].coeff, block);
xd->mi[0].src_mi->bmi[block].as_mode = mode;
vp9_predict_intra_block(xd, block, 1,
TX_4X4, mode,
x->skip_encode ? src : dst,
x->skip_encode ? src_stride : dst_stride,
dst, dst_stride, idx, idy, 0);
vp9_high_subtract_block(4, 4, src_diff, 8, src, src_stride,
dst, dst_stride, xd->bd);
if (xd->lossless) {
const scan_order *so = &vp9_default_scan_orders[TX_4X4];
vp9_high_fwht4x4(src_diff, coeff, 8);
vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4,
so->scan, so->neighbors,
cpi->sf.use_fast_coef_costing);
if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
goto next_highbd;
vp9_high_iwht4x4_add(BLOCK_OFFSET(pd->dqcoeff, block),
dst, dst_stride,
p->eobs[block], xd->bd);
} else {
int64_t unused;
const TX_TYPE tx_type = get_tx_type_4x4(PLANE_TYPE_Y, xd, block);
const scan_order *so = &vp9_scan_orders[TX_4X4][tx_type];
vp9_high_fht4x4(src_diff, coeff, 8, tx_type);
vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4,
so->scan, so->neighbors,
cpi->sf.use_fast_coef_costing);
distortion += vp9_high_block_error(coeff,
BLOCK_OFFSET(pd->dqcoeff, block),
16, &unused, xd->bd) >> 2;
if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
goto next_highbd;
vp9_high_iht4x4_add(tx_type, BLOCK_OFFSET(pd->dqcoeff, block),
dst, dst_stride, p->eobs[block], xd->bd);
}
}
}
rate += ratey;
this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
if (this_rd < best_rd) {
*bestrate = rate;
*bestratey = ratey;
*bestdistortion = distortion;
best_rd = this_rd;
*best_mode = mode;
vpx_memcpy(a, tempa, sizeof(tempa));
vpx_memcpy(l, templ, sizeof(templ));
for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy) {
vpx_memcpy(best_dst16 + idy * 8,
CONVERT_TO_SHORTPTR(dst_init + idy * dst_stride),
num_4x4_blocks_wide * 4 * sizeof(uint16_t));
}
}
next_highbd:
{}
}
if (best_rd >= rd_thresh || x->skip_encode)
return best_rd;
for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy) {
vpx_memcpy(CONVERT_TO_SHORTPTR(dst_init + idy * dst_stride),
best_dst16 + idy * 8,
num_4x4_blocks_wide * 4 * sizeof(uint16_t));
}
return best_rd;
}
#endif // CONFIG_VP9_HIGHBITDEPTH
for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
int64_t this_rd;
int ratey = 0;
@ -1118,6 +1303,16 @@ static int64_t encode_inter_mb_segment(VP9_COMP *cpi,
for (ref = 0; ref < 1 + is_compound; ++ref) {
const uint8_t *pre = &pd->pre[ref].buf[raster_block_offset(BLOCK_8X8, i,
pd->pre[ref].stride)];
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
vp9_high_build_inter_predictor(pre, pd->pre[ref].stride,
dst, pd->dst.stride,
&mi->bmi[i].as_mv[ref].as_mv,
&xd->block_refs[ref]->sf, width, height, ref,
kernel, MV_PRECISION_Q3,
mi_col * MI_SIZE + 4 * (i % 2),
mi_row * MI_SIZE + 4 * (i / 2), xd->bd);
} else {
vp9_build_inter_predictor(pre, pd->pre[ref].stride,
dst, pd->dst.stride,
&mi->bmi[i].as_mv[ref].as_mv,
@ -1126,11 +1321,32 @@ static int64_t encode_inter_mb_segment(VP9_COMP *cpi,
mi_col * MI_SIZE + 4 * (i % 2),
mi_row * MI_SIZE + 4 * (i / 2));
}
#else
vp9_build_inter_predictor(pre, pd->pre[ref].stride,
dst, pd->dst.stride,
&mi->bmi[i].as_mv[ref].as_mv,
&xd->block_refs[ref]->sf, width, height, ref,
kernel, MV_PRECISION_Q3,
mi_col * MI_SIZE + 4 * (i % 2),
mi_row * MI_SIZE + 4 * (i / 2));
#endif // CONFIG_VP9_HIGHBITDEPTH
}
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
vp9_high_subtract_block(
height, width, raster_block_offset_int16(BLOCK_8X8, i, p->src_diff), 8,
src, p->src.stride, dst, pd->dst.stride, xd->bd);
} else {
vp9_subtract_block(
height, width, raster_block_offset_int16(BLOCK_8X8, i, p->src_diff), 8,
src, p->src.stride, dst, pd->dst.stride);
}
#else
vp9_subtract_block(height, width,
raster_block_offset_int16(BLOCK_8X8, i, p->src_diff), 8,
src, p->src.stride,
dst, pd->dst.stride);
src, p->src.stride, dst, pd->dst.stride);
#endif // CONFIG_VP9_HIGHBITDEPTH
k = i;
for (idy = 0; idy < height / 4; ++idy) {
@ -1143,8 +1359,19 @@ static int64_t encode_inter_mb_segment(VP9_COMP *cpi,
x->fwd_txm4x4(raster_block_offset_int16(BLOCK_8X8, k, p->src_diff),
coeff, 8);
vp9_regular_quantize_b_4x4(x, 0, k, so->scan, so->iscan);
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
thisdistortion += vp9_high_block_error(coeff,
BLOCK_OFFSET(pd->dqcoeff, k),
16, &ssz, xd->bd);
} else {
thisdistortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, k),
16, &ssz);
}
#else
thisdistortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, k),
16, &ssz);
#endif // CONFIG_VP9_HIGHBITDEPTH
thissse += ssz;
thisrate += cost_coeffs(x, 0, k, ta + (k & 1), tl + (k >> 1), TX_4X4,
so->scan, so->neighbors,
@ -1901,7 +2128,12 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
int_mv ref_mv[2];
int ite, ref;
// Prediction buffer from second frame.
#if CONFIG_VP9_HIGHBITDEPTH
uint8_t *second_pred;
uint8_t *second_pred_alloc;
#else
uint8_t *second_pred = vpx_memalign(16, pw * ph * sizeof(uint8_t));
#endif // CONFIG_VP9_HIGHBITDEPTH
const InterpKernel *kernel = vp9_get_interp_kernel(mbmi->interp_filter);
// Do joint motion search in compound mode to get more accurate mv.
@ -1912,6 +2144,15 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
vp9_get_scaled_ref_frame(cpi, mbmi->ref_frame[0]),
vp9_get_scaled_ref_frame(cpi, mbmi->ref_frame[1])
};
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
second_pred_alloc = vpx_memalign(16, pw * ph * sizeof(uint16_t));
second_pred = CONVERT_TO_BYTEPTR(second_pred_alloc);
} else {
second_pred_alloc = vpx_memalign(16, pw * ph * sizeof(uint8_t));
second_pred = second_pred_alloc;
}
#endif // CONFIG_VP9_HIGHBITDEPTH
for (ref = 0; ref < 2; ++ref) {
ref_mv[ref] = mbmi->ref_mvs[refs[ref]][0];
@ -1950,6 +2191,28 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
ref_yv12[1] = xd->plane[0].pre[1];
// Get pred block from second frame.
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
vp9_high_build_inter_predictor(ref_yv12[!id].buf,
ref_yv12[!id].stride,
second_pred, pw,
&frame_mv[refs[!id]].as_mv,
&xd->block_refs[!id]->sf,
pw, ph, 0,
kernel, MV_PRECISION_Q3,
mi_col * MI_SIZE, mi_row * MI_SIZE,
xd->bd);
} else {
vp9_build_inter_predictor(ref_yv12[!id].buf,
ref_yv12[!id].stride,
second_pred, pw,
&frame_mv[refs[!id]].as_mv,
&xd->block_refs[!id]->sf,
pw, ph, 0,
kernel, MV_PRECISION_Q3,
mi_col * MI_SIZE, mi_row * MI_SIZE);
}
#else
vp9_build_inter_predictor(ref_yv12[!id].buf,
ref_yv12[!id].stride,
second_pred, pw,
@ -1958,6 +2221,7 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
pw, ph, 0,
kernel, MV_PRECISION_Q3,
mi_col * MI_SIZE, mi_row * MI_SIZE);
#endif // CONFIG_VP9_HIGHBITDEPTH
// Compound motion search on first ref frame.
if (id)
@ -2026,7 +2290,11 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
}
#if CONFIG_VP9_HIGHBITDEPTH
vpx_free(second_pred_alloc);
#else
vpx_free(second_pred);
#endif // CONFIG_VP9_HIGHBITDEPTH
}
static INLINE void restore_dst_buf(MACROBLOCKD *xd,
@ -2068,12 +2336,26 @@ static void rd_encode_breakout_test(VP9_COMP *cpi, MACROBLOCK *x,
// Calculate threshold according to dequant value.
thresh_ac = (xd->plane[0].dequant[1] * xd->plane[0].dequant[1]) / 9;
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
const int shift = 2 * xd->bd - 16;
if (shift > 0)
thresh_ac = ROUND_POWER_OF_TWO(thresh_ac, shift);
}
#endif // CONFIG_VP9_HIGHBITDEPTH
thresh_ac = clamp(thresh_ac, min_thresh, max_thresh);
// Adjust threshold according to partition size.
thresh_ac >>= 8 - (b_width_log2(bsize) +
b_height_log2(bsize));
thresh_dc = (xd->plane[0].dequant[0] * xd->plane[0].dequant[0] >> 6);
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
const int shift = 2 * xd->bd - 16;
if (shift > 0)
thresh_dc = ROUND_POWER_OF_TWO(thresh_dc, shift);
}
#endif // CONFIG_VP9_HIGHBITDEPTH
} else {
thresh_ac = 0;
thresh_dc = 0;
@ -2145,7 +2427,13 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
int refs[2] = { mbmi->ref_frame[0],
(mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) };
int_mv cur_mv[2];
#if CONFIG_VP9_HIGHBITDEPTH
DECLARE_ALIGNED_ARRAY(16, uint16_t, tmp_buf16, MAX_MB_PLANE * 64 * 64);
DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf8, MAX_MB_PLANE * 64 * 64);
uint8_t *tmp_buf = tmp_buf8;
#else
DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf, MAX_MB_PLANE * 64 * 64);
#endif // CONFIG_VP9_HIGHBITDEPTH
int pred_exists = 0;
int intpel_mv;
int64_t rd, tmp_rd, best_rd = INT64_MAX;
@ -2162,6 +2450,14 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
(((mi_row + mi_col) >> bsl) +
get_chessboard_index(cm->current_video_frame)) & 0x1 : 0;
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
tmp_buf = CONVERT_TO_BYTEPTR(tmp_buf16);
} else {
tmp_buf = tmp_buf8;
}
#endif // CONFIG_VP9_HIGHBITDEPTH
if (pred_filter_search) {
INTERP_FILTER af = SWITCHABLE, lf = SWITCHABLE;
if (xd->up_available)
@ -2575,8 +2871,8 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
int64_t dist_uv[TX_SIZES];
int skip_uv[TX_SIZES];
PREDICTION_MODE mode_uv[TX_SIZES];
const int intra_cost_penalty =
20 * vp9_dc_quant(cm->base_qindex, cm->y_dc_delta_q, cm->bit_depth);
const int intra_cost_penalty = vp9_get_intra_cost_penalty(
cm->base_qindex, cm->y_dc_delta_q, cm->bit_depth);
int best_skip2 = 0;
uint8_t ref_frame_skip_mask[2] = { 0 };
uint16_t mode_skip_mask[MAX_REF_FRAMES] = { 0 };
@ -3011,9 +3307,14 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
// based on qp, activity mask and history
if ((mode_search_skip_flags & FLAG_EARLY_TERMINATE) &&
(mode_index > MIN_EARLY_TERM_INDEX)) {
const int qstep = xd->plane[0].dequant[1];
int qstep = xd->plane[0].dequant[1];
// TODO(debargha): Enhance this by specializing for each mode_index
int scale = 4;
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
qstep >>= (xd->bd - 8);
}
#endif // CONFIG_VP9_HIGHBITDEPTH
if (x->source_variance < UINT_MAX) {
const int var_adjust = (x->source_variance < 16);
scale -= var_adjust;
@ -3329,8 +3630,8 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
int64_t dist_uv;
int skip_uv;
PREDICTION_MODE mode_uv = DC_PRED;
const int intra_cost_penalty =
20 * vp9_dc_quant(cm->base_qindex, cm->y_dc_delta_q, cm->bit_depth);
const int intra_cost_penalty = vp9_get_intra_cost_penalty(
cm->base_qindex, cm->y_dc_delta_q, cm->bit_depth);
int_mv seg_mvs[4][MAX_REF_FRAMES];
b_mode_info best_bmodes[4];
int best_skip2 = 0;
@ -3748,9 +4049,14 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
// based on qp, activity mask and history
if ((cpi->sf.mode_search_skip_flags & FLAG_EARLY_TERMINATE) &&
(ref_index > MIN_EARLY_TERM_INDEX)) {
const int qstep = xd->plane[0].dequant[1];
int qstep = xd->plane[0].dequant[1];
// TODO(debargha): Enhance this by specializing for each mode_index
int scale = 4;
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
qstep >>= (xd->bd - 8);
}
#endif // CONFIG_VP9_HIGHBITDEPTH
if (x->source_variance < UINT_MAX) {
const int var_adjust = (x->source_variance < 16);
scale -= var_adjust;

@ -54,7 +54,6 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(struct VP9_COMP *cpi,
BLOCK_SIZE bsize,
PICK_MODE_CONTEXT *ctx,
int64_t best_rd_so_far);
#ifdef __cplusplus
} // extern "C"
#endif

@ -53,6 +53,12 @@ extern const int16_t *vp9_dct_value_cost_ptr;
* fields are not.
*/
extern const TOKENVALUE *vp9_dct_value_tokens_ptr;
#if CONFIG_VP9_HIGHBITDEPTH
extern const int16_t *vp9_dct_value_cost_high10_ptr;
extern const TOKENVALUE *vp9_dct_value_tokens_high10_ptr;
extern const int16_t *vp9_dct_value_cost_high12_ptr;
extern const TOKENVALUE *vp9_dct_value_tokens_high12_ptr;
#endif // CONFIG_VP9_HIGHBITDEPTH
#ifdef __cplusplus
} // extern "C"