Adds various high bit-depth encode functions
Change-Id: I6f67b171022bbc8199c6d674190b57f6bab1b62f
This commit is contained in:
parent
6989e81d61
commit
993d10a217
@ -196,6 +196,64 @@ static void inverse_transform_block(MACROBLOCKD* xd, int plane, int block,
|
||||
if (eob > 0) {
|
||||
TX_TYPE tx_type = DCT_DCT;
|
||||
tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
||||
if (xd->lossless) {
|
||||
tx_type = DCT_DCT;
|
||||
vp9_high_iwht4x4_add(dqcoeff, dst, stride, eob, xd->bd);
|
||||
} else {
|
||||
const PLANE_TYPE plane_type = pd->plane_type;
|
||||
switch (tx_size) {
|
||||
case TX_4X4:
|
||||
tx_type = get_tx_type_4x4(plane_type, xd, block);
|
||||
vp9_high_iht4x4_add(tx_type, dqcoeff, dst, stride, eob, xd->bd);
|
||||
break;
|
||||
case TX_8X8:
|
||||
tx_type = get_tx_type(plane_type, xd);
|
||||
vp9_high_iht8x8_add(tx_type, dqcoeff, dst, stride, eob, xd->bd);
|
||||
break;
|
||||
case TX_16X16:
|
||||
tx_type = get_tx_type(plane_type, xd);
|
||||
vp9_high_iht16x16_add(tx_type, dqcoeff, dst, stride, eob, xd->bd);
|
||||
break;
|
||||
case TX_32X32:
|
||||
tx_type = DCT_DCT;
|
||||
vp9_high_idct32x32_add(dqcoeff, dst, stride, eob, xd->bd);
|
||||
break;
|
||||
default:
|
||||
assert(0 && "Invalid transform size");
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (xd->lossless) {
|
||||
tx_type = DCT_DCT;
|
||||
vp9_iwht4x4_add(dqcoeff, dst, stride, eob);
|
||||
} else {
|
||||
const PLANE_TYPE plane_type = pd->plane_type;
|
||||
switch (tx_size) {
|
||||
case TX_4X4:
|
||||
tx_type = get_tx_type_4x4(plane_type, xd, block);
|
||||
vp9_iht4x4_add(tx_type, dqcoeff, dst, stride, eob);
|
||||
break;
|
||||
case TX_8X8:
|
||||
tx_type = get_tx_type(plane_type, xd);
|
||||
vp9_iht8x8_add(tx_type, dqcoeff, dst, stride, eob);
|
||||
break;
|
||||
case TX_16X16:
|
||||
tx_type = get_tx_type(plane_type, xd);
|
||||
vp9_iht16x16_add(tx_type, dqcoeff, dst, stride, eob);
|
||||
break;
|
||||
case TX_32X32:
|
||||
tx_type = DCT_DCT;
|
||||
vp9_idct32x32_add(dqcoeff, dst, stride, eob);
|
||||
break;
|
||||
default:
|
||||
assert(0 && "Invalid transform size");
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
#else
|
||||
if (xd->lossless) {
|
||||
tx_type = DCT_DCT;
|
||||
vp9_iwht4x4_add(dqcoeff, dst, stride, eob);
|
||||
@ -220,8 +278,10 @@ static void inverse_transform_block(MACROBLOCKD* xd, int plane, int block,
|
||||
break;
|
||||
default:
|
||||
assert(0 && "Invalid transform size");
|
||||
return;
|
||||
}
|
||||
}
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
|
||||
if (eob == 1) {
|
||||
vpx_memset(dqcoeff, 0, 2 * sizeof(dqcoeff[0]));
|
||||
@ -599,6 +659,9 @@ static void setup_quantization(VP9_COMMON *const cm, MACROBLOCKD *const xd,
|
||||
cm->y_dc_delta_q == 0 &&
|
||||
cm->uv_dc_delta_q == 0 &&
|
||||
cm->uv_ac_delta_q == 0;
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
xd->bd = (int)cm->bit_depth;
|
||||
#endif
|
||||
}
|
||||
|
||||
static INTERP_FILTER read_interp_filter(struct vp9_read_bit_buffer *rb) {
|
||||
@ -1139,8 +1202,17 @@ BITSTREAM_PROFILE vp9_read_profile(struct vp9_read_bit_buffer *rb) {
|
||||
|
||||
static void read_bitdepth_colorspace_sampling(
|
||||
VP9_COMMON *cm, struct vp9_read_bit_buffer *rb) {
|
||||
if (cm->profile >= PROFILE_2)
|
||||
if (cm->profile >= PROFILE_2) {
|
||||
cm->bit_depth = vp9_rb_read_bit(rb) ? VPX_BITS_12 : VPX_BITS_10;
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
cm->use_highbitdepth = 1;
|
||||
#endif
|
||||
} else {
|
||||
cm->bit_depth = VPX_BITS_8;
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
cm->use_highbitdepth = 0;
|
||||
#endif
|
||||
}
|
||||
cm->color_space = (COLOR_SPACE)vp9_rb_read_literal(rb, 3);
|
||||
if (cm->color_space != SRGB) {
|
||||
vp9_rb_read_bit(rb); // [16,235] (including xvycc) vs [0,255] range
|
||||
@ -1244,6 +1316,10 @@ static size_t read_uncompressed_header(VP9Decoder *pbi,
|
||||
// case (normative).
|
||||
cm->color_space = BT_601;
|
||||
cm->subsampling_y = cm->subsampling_x = 1;
|
||||
cm->bit_depth = VPX_BITS_8;
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
cm->use_highbitdepth = 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
pbi->refresh_frame_flags = vp9_rb_read_literal(rb, REF_FRAMES);
|
||||
@ -1284,6 +1360,9 @@ static size_t read_uncompressed_header(VP9Decoder *pbi,
|
||||
}
|
||||
}
|
||||
}
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
get_frame_new_buffer(cm)->bit_depth = cm->bit_depth;
|
||||
#endif
|
||||
|
||||
if (pbi->need_resync) {
|
||||
vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,
|
||||
|
@ -34,6 +34,9 @@ static int segment_id[MAX_SEGMENTS] = { 5, 3, 1, 0, 2, 4, 6, 7 };
|
||||
#define SEGMENT_ID(i) segment_id[(i) - ENERGY_MIN]
|
||||
|
||||
DECLARE_ALIGNED(16, static const uint8_t, vp9_64_zeros[64]) = {0};
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
DECLARE_ALIGNED(16, static const uint16_t, vp9_highbd_64_zeros[64]) = {0};
|
||||
#endif
|
||||
|
||||
unsigned int vp9_vaq_segment_id(int energy) {
|
||||
ENERGY_IN_BOUNDS(energy);
|
||||
@ -126,14 +129,40 @@ static unsigned int block_variance(VP9_COMP *cpi, MACROBLOCK *x,
|
||||
const int bw = 8 * num_8x8_blocks_wide_lookup[bs] - right_overflow;
|
||||
const int bh = 8 * num_8x8_blocks_high_lookup[bs] - bottom_overflow;
|
||||
int avg;
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
||||
high_variance(x->plane[0].src.buf, x->plane[0].src.stride,
|
||||
CONVERT_TO_BYTEPTR(vp9_highbd_64_zeros), 0, bw, bh, &sse,
|
||||
&avg);
|
||||
sse >>= 2 * (xd->bd - 8);
|
||||
avg >>= (xd->bd - 8);
|
||||
} else {
|
||||
variance(x->plane[0].src.buf, x->plane[0].src.stride,
|
||||
vp9_64_zeros, 0, bw, bh, &sse, &avg);
|
||||
}
|
||||
#else
|
||||
variance(x->plane[0].src.buf, x->plane[0].src.stride,
|
||||
vp9_64_zeros, 0, bw, bh, &sse, &avg);
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
var = sse - (((int64_t)avg * avg) / (bw * bh));
|
||||
return (256 * var) / (bw * bh);
|
||||
} else {
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
||||
var = cpi->fn_ptr[bs].vf(x->plane[0].src.buf,
|
||||
x->plane[0].src.stride,
|
||||
CONVERT_TO_BYTEPTR(vp9_highbd_64_zeros),
|
||||
0, &sse);
|
||||
} else {
|
||||
var = cpi->fn_ptr[bs].vf(x->plane[0].src.buf,
|
||||
x->plane[0].src.stride,
|
||||
vp9_64_zeros, 0, &sse);
|
||||
}
|
||||
#else
|
||||
var = cpi->fn_ptr[bs].vf(x->plane[0].src.buf,
|
||||
x->plane[0].src.stride,
|
||||
vp9_64_zeros, 0, &sse);
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
return (256 * var) >> num_pels_log2_lookup[bs];
|
||||
}
|
||||
}
|
||||
|
@ -120,16 +120,28 @@ static void update_switchable_interp_probs(VP9_COMMON *cm, vp9_writer *w) {
|
||||
}
|
||||
|
||||
static void pack_mb_tokens(vp9_writer *w,
|
||||
TOKENEXTRA **tp, const TOKENEXTRA *const stop) {
|
||||
TOKENEXTRA **tp, const TOKENEXTRA *const stop,
|
||||
vpx_bit_depth_t bit_depth) {
|
||||
TOKENEXTRA *p = *tp;
|
||||
|
||||
while (p < stop && p->token != EOSB_TOKEN) {
|
||||
const int t = p->token;
|
||||
const struct vp9_token *const a = &vp9_coef_encodings[t];
|
||||
const vp9_extra_bit *const b = &vp9_extra_bits[t];
|
||||
int i = 0;
|
||||
int v = a->value;
|
||||
int n = a->len;
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
const vp9_extra_bit *b;
|
||||
if (bit_depth == VPX_BITS_12)
|
||||
b = &vp9_extra_bits_high12[t];
|
||||
else if (bit_depth == VPX_BITS_10)
|
||||
b = &vp9_extra_bits_high10[t];
|
||||
else
|
||||
b = &vp9_extra_bits[t];
|
||||
#else
|
||||
const vp9_extra_bit *const b = &vp9_extra_bits[t];
|
||||
(void) bit_depth;
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
|
||||
/* skip one or two nodes */
|
||||
if (p->skip_eob_node) {
|
||||
@ -387,7 +399,7 @@ static void write_modes_b(VP9_COMP *cpi, const TileInfo *const tile,
|
||||
}
|
||||
|
||||
assert(*tok < tok_end);
|
||||
pack_mb_tokens(w, tok, tok_end);
|
||||
pack_mb_tokens(w, tok, tok_end, cm->bit_depth);
|
||||
}
|
||||
|
||||
static void write_partition(const VP9_COMMON *const cm,
|
||||
|
@ -61,16 +61,51 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled,
|
||||
// Eventually this should be replaced by custom no-reference routines,
|
||||
// which will be faster.
|
||||
static const uint8_t VP9_VAR_OFFS[64] = {
|
||||
128, 128, 128, 128, 128, 128, 128, 128,
|
||||
128, 128, 128, 128, 128, 128, 128, 128,
|
||||
128, 128, 128, 128, 128, 128, 128, 128,
|
||||
128, 128, 128, 128, 128, 128, 128, 128,
|
||||
128, 128, 128, 128, 128, 128, 128, 128,
|
||||
128, 128, 128, 128, 128, 128, 128, 128,
|
||||
128, 128, 128, 128, 128, 128, 128, 128,
|
||||
128, 128, 128, 128, 128, 128, 128, 128
|
||||
128, 128, 128, 128, 128, 128, 128, 128,
|
||||
128, 128, 128, 128, 128, 128, 128, 128,
|
||||
128, 128, 128, 128, 128, 128, 128, 128,
|
||||
128, 128, 128, 128, 128, 128, 128, 128,
|
||||
128, 128, 128, 128, 128, 128, 128, 128,
|
||||
128, 128, 128, 128, 128, 128, 128, 128,
|
||||
128, 128, 128, 128, 128, 128, 128, 128,
|
||||
128, 128, 128, 128, 128, 128, 128, 128
|
||||
};
|
||||
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
static const uint16_t VP9_HIGH_VAR_OFFS_8[64] = {
|
||||
128, 128, 128, 128, 128, 128, 128, 128,
|
||||
128, 128, 128, 128, 128, 128, 128, 128,
|
||||
128, 128, 128, 128, 128, 128, 128, 128,
|
||||
128, 128, 128, 128, 128, 128, 128, 128,
|
||||
128, 128, 128, 128, 128, 128, 128, 128,
|
||||
128, 128, 128, 128, 128, 128, 128, 128,
|
||||
128, 128, 128, 128, 128, 128, 128, 128,
|
||||
128, 128, 128, 128, 128, 128, 128, 128
|
||||
};
|
||||
|
||||
static const uint16_t VP9_HIGH_VAR_OFFS_10[64] = {
|
||||
128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4,
|
||||
128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4,
|
||||
128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4,
|
||||
128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4,
|
||||
128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4,
|
||||
128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4,
|
||||
128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4,
|
||||
128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4
|
||||
};
|
||||
|
||||
static const uint16_t VP9_HIGH_VAR_OFFS_12[64] = {
|
||||
128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16,
|
||||
128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16,
|
||||
128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16,
|
||||
128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16,
|
||||
128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16,
|
||||
128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16,
|
||||
128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16,
|
||||
128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16
|
||||
};
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
|
||||
static unsigned int get_sby_perpixel_variance(VP9_COMP *cpi,
|
||||
const struct buf_2d *ref,
|
||||
BLOCK_SIZE bs) {
|
||||
@ -80,6 +115,32 @@ static unsigned int get_sby_perpixel_variance(VP9_COMP *cpi,
|
||||
return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[bs]);
|
||||
}
|
||||
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
static unsigned int high_get_sby_perpixel_variance(
|
||||
VP9_COMP *cpi, const struct buf_2d *ref, BLOCK_SIZE bs, int bd) {
|
||||
unsigned int var, sse;
|
||||
switch (bd) {
|
||||
case 10:
|
||||
var = cpi->fn_ptr[bs].vf(ref->buf, ref->stride,
|
||||
CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_10),
|
||||
0, &sse);
|
||||
break;
|
||||
case 12:
|
||||
var = cpi->fn_ptr[bs].vf(ref->buf, ref->stride,
|
||||
CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_12),
|
||||
0, &sse);
|
||||
break;
|
||||
case 8:
|
||||
default:
|
||||
var = cpi->fn_ptr[bs].vf(ref->buf, ref->stride,
|
||||
CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_8),
|
||||
0, &sse);
|
||||
break;
|
||||
}
|
||||
return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[bs]);
|
||||
}
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
|
||||
static unsigned int get_sby_perpixel_diff_variance(VP9_COMP *cpi,
|
||||
const struct buf_2d *ref,
|
||||
int mi_row, int mi_col,
|
||||
@ -419,6 +480,22 @@ static void choose_partitioning(VP9_COMP *cpi,
|
||||
} else {
|
||||
d = VP9_VAR_OFFS;
|
||||
dp = 0;
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
||||
switch (xd->bd) {
|
||||
case 10:
|
||||
d = CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_10);
|
||||
break;
|
||||
case 12:
|
||||
d = CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_12);
|
||||
break;
|
||||
case 8:
|
||||
default:
|
||||
d = CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_8);
|
||||
break;
|
||||
}
|
||||
}
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
}
|
||||
|
||||
// Fill in the entire tree of 8x8 variances for splits.
|
||||
@ -734,7 +811,17 @@ static void rd_pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile,
|
||||
// Set to zero to make sure we do not use the previous encoded frame stats
|
||||
mbmi->skip = 0;
|
||||
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
||||
x->source_variance =
|
||||
high_get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize, xd->bd);
|
||||
} else {
|
||||
x->source_variance =
|
||||
get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize);
|
||||
}
|
||||
#else
|
||||
x->source_variance = get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize);
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
|
||||
// Save rdmult before it might be changed, so it can be restored later.
|
||||
orig_rdmult = x->rdmult;
|
||||
@ -3170,9 +3257,34 @@ static int set_var_thresh_from_histogram(VP9_COMP *cpi) {
|
||||
|
||||
for (i = 0; i < cm->mb_rows; i++) {
|
||||
for (j = 0; j < cm->mb_cols; j++) {
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
if (cm->use_highbitdepth) {
|
||||
switch (cm->bit_depth) {
|
||||
case VPX_BITS_8:
|
||||
vp9_high_get16x16var(src, src_stride, last_src, last_stride,
|
||||
&var16->sse, &var16->sum);
|
||||
break;
|
||||
case VPX_BITS_10:
|
||||
vp9_high_10_get16x16var(src, src_stride, last_src, last_stride,
|
||||
&var16->sse, &var16->sum);
|
||||
break;
|
||||
case VPX_BITS_12:
|
||||
vp9_high_12_get16x16var(src, src_stride, last_src, last_stride,
|
||||
&var16->sse, &var16->sum);
|
||||
break;
|
||||
default:
|
||||
assert(0 && "cm->bit_depth should be VPX_BITS_8, VPX_BITS_10"
|
||||
" or VPX_BITS_12");
|
||||
return -1;
|
||||
}
|
||||
} else {
|
||||
vp9_get16x16var(src, src_stride, last_src, last_stride,
|
||||
&var16->sse, &var16->sum);
|
||||
}
|
||||
#else
|
||||
vp9_get16x16var(src, src_stride, last_src, last_stride,
|
||||
&var16->sse, &var16->sum);
|
||||
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
var16->var = var16->sse -
|
||||
(((uint32_t)var16->sum * var16->sum) >> 8);
|
||||
|
||||
@ -3314,7 +3426,15 @@ static void encode_frame_internal(VP9_COMP *cpi) {
|
||||
|
||||
cm->tx_mode = select_tx_mode(cpi);
|
||||
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
if (cm->use_highbitdepth)
|
||||
x->fwd_txm4x4 = xd->lossless ? vp9_fwht4x4 : vp9_fdct4x4;
|
||||
else
|
||||
x->fwd_txm4x4 = xd->lossless ? vp9_high_fwht4x4 : vp9_high_fdct4x4;
|
||||
x->high_itxm_add = xd->lossless ? vp9_high_iwht4x4_add : vp9_high_idct4x4_add;
|
||||
#else
|
||||
x->fwd_txm4x4 = xd->lossless ? vp9_fwht4x4 : vp9_fdct4x4;
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
x->itxm_add = xd->lossless ? vp9_iwht4x4_add : vp9_idct4x4_add;
|
||||
|
||||
if (xd->lossless) {
|
||||
|
@ -51,6 +51,29 @@ void vp9_subtract_block_c(int rows, int cols,
|
||||
}
|
||||
}
|
||||
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
void vp9_high_subtract_block_c(int rows, int cols,
|
||||
int16_t *diff, ptrdiff_t diff_stride,
|
||||
const uint8_t *src8, ptrdiff_t src_stride,
|
||||
const uint8_t *pred8, ptrdiff_t pred_stride,
|
||||
int bd) {
|
||||
int r, c;
|
||||
uint16_t *src = CONVERT_TO_SHORTPTR(src8);
|
||||
uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
|
||||
(void) bd;
|
||||
|
||||
for (r = 0; r < rows; r++) {
|
||||
for (c = 0; c < cols; c++) {
|
||||
diff[c] = src[c] - pred[c];
|
||||
}
|
||||
|
||||
diff += diff_stride;
|
||||
pred += pred_stride;
|
||||
src += src_stride;
|
||||
}
|
||||
}
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
|
||||
void vp9_subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) {
|
||||
struct macroblock_plane *const p = &x->plane[plane];
|
||||
const struct macroblockd_plane *const pd = &x->e_mbd.plane[plane];
|
||||
@ -58,6 +81,13 @@ void vp9_subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) {
|
||||
const int bw = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
|
||||
const int bh = 4 * num_4x4_blocks_high_lookup[plane_bsize];
|
||||
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
if (x->e_mbd.cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
||||
vp9_high_subtract_block(bh, bw, p->src_diff, bw, p->src.buf, p->src.stride,
|
||||
pd->dst.buf, pd->dst.stride, x->e_mbd.bd);
|
||||
return;
|
||||
}
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
vp9_subtract_block(bh, bw, p->src_diff, bw, p->src.buf, p->src.stride,
|
||||
pd->dst.buf, pd->dst.stride);
|
||||
}
|
||||
@ -124,6 +154,8 @@ static int optimize_b(MACROBLOCK *mb, int plane, int block,
|
||||
int64_t rd_cost0, rd_cost1;
|
||||
int rate0, rate1, error0, error1, t0, t1;
|
||||
int best, band, pt, i, final_eob;
|
||||
const TOKENVALUE *dct_value_tokens;
|
||||
const int16_t *dct_value_cost;
|
||||
|
||||
assert((!type && !plane) || (type && plane));
|
||||
assert(eob <= default_eob);
|
||||
@ -140,9 +172,24 @@ static int optimize_b(MACROBLOCK *mb, int plane, int block,
|
||||
tokens[eob][0].qc = 0;
|
||||
tokens[eob][1] = tokens[eob][0];
|
||||
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
if (xd->bd == 12) {
|
||||
dct_value_tokens = vp9_dct_value_tokens_high12_ptr;
|
||||
dct_value_cost = vp9_dct_value_cost_high12_ptr;
|
||||
} else if (xd->bd == 10) {
|
||||
dct_value_tokens = vp9_dct_value_tokens_high10_ptr;
|
||||
dct_value_cost = vp9_dct_value_cost_high10_ptr;
|
||||
} else {
|
||||
dct_value_tokens = vp9_dct_value_tokens_ptr;
|
||||
dct_value_cost = vp9_dct_value_cost_ptr;
|
||||
}
|
||||
#else
|
||||
dct_value_tokens = vp9_dct_value_tokens_ptr;
|
||||
dct_value_cost = vp9_dct_value_cost_ptr;
|
||||
#endif
|
||||
for (i = 0; i < eob; i++)
|
||||
token_cache[scan[i]] =
|
||||
vp9_pt_energy_class[vp9_dct_value_tokens_ptr[qcoeff[scan[i]]].token];
|
||||
vp9_pt_energy_class[dct_value_tokens[qcoeff[scan[i]]].token];
|
||||
|
||||
for (i = eob; i-- > 0;) {
|
||||
int base_bits, d2, dx;
|
||||
@ -156,7 +203,7 @@ static int optimize_b(MACROBLOCK *mb, int plane, int block,
|
||||
/* Evaluate the first possibility for this state. */
|
||||
rate0 = tokens[next][0].rate;
|
||||
rate1 = tokens[next][1].rate;
|
||||
t0 = (vp9_dct_value_tokens_ptr + x)->token;
|
||||
t0 = (dct_value_tokens + x)->token;
|
||||
/* Consider both possible successor states. */
|
||||
if (next < default_eob) {
|
||||
band = band_translate[i + 1];
|
||||
@ -169,8 +216,13 @@ static int optimize_b(MACROBLOCK *mb, int plane, int block,
|
||||
UPDATE_RD_COST();
|
||||
/* And pick the best. */
|
||||
best = rd_cost1 < rd_cost0;
|
||||
base_bits = vp9_dct_value_cost_ptr[x];
|
||||
base_bits = dct_value_cost[x];
|
||||
dx = mul * (dqcoeff[rc] - coeff[rc]);
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
||||
dx >>= xd->bd - 8;
|
||||
}
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
d2 = dx * dx;
|
||||
tokens[i][0].rate = base_bits + (best ? rate1 : rate0);
|
||||
tokens[i][0].error = d2 + (best ? error1 : error0);
|
||||
@ -203,7 +255,7 @@ static int optimize_b(MACROBLOCK *mb, int plane, int block,
|
||||
t0 = tokens[next][0].token == EOB_TOKEN ? EOB_TOKEN : ZERO_TOKEN;
|
||||
t1 = tokens[next][1].token == EOB_TOKEN ? EOB_TOKEN : ZERO_TOKEN;
|
||||
} else {
|
||||
t0 = t1 = (vp9_dct_value_tokens_ptr + x)->token;
|
||||
t0 = t1 = (dct_value_tokens + x)->token;
|
||||
}
|
||||
if (next < default_eob) {
|
||||
band = band_translate[i + 1];
|
||||
@ -222,10 +274,19 @@ static int optimize_b(MACROBLOCK *mb, int plane, int block,
|
||||
UPDATE_RD_COST();
|
||||
/* And pick the best. */
|
||||
best = rd_cost1 < rd_cost0;
|
||||
base_bits = vp9_dct_value_cost_ptr[x];
|
||||
base_bits = dct_value_cost[x];
|
||||
|
||||
if (shortcut) {
|
||||
dx -= (dequant_ptr[rc != 0] + sz) ^ sz;
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
||||
dx -= ((dequant_ptr[rc != 0] >> (xd->bd - 8)) + sz) ^ sz;
|
||||
} else {
|
||||
dx -= (dequant_ptr[rc != 0] + sz) ^ sz;
|
||||
}
|
||||
#else
|
||||
dx -= (dequant_ptr[rc != 0] + sz) ^ sz;
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
d2 = dx * dx;
|
||||
}
|
||||
tokens[i][1].rate = base_bits + (best ? rate1 : rate0);
|
||||
@ -310,7 +371,7 @@ static INLINE void high_fdct32x32(int rd_transform, const int16_t *src,
|
||||
else
|
||||
vp9_high_fdct32x32(src, dst, src_stride);
|
||||
}
|
||||
#endif
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
|
||||
void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block,
|
||||
BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
|
||||
@ -328,6 +389,44 @@ void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block,
|
||||
txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
|
||||
src_diff = &p->src_diff[4 * (j * diff_stride + i)];
|
||||
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
||||
switch (tx_size) {
|
||||
case TX_32X32:
|
||||
high_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
|
||||
vp9_high_quantize_fp_32x32(coeff, 1024, x->skip_block, p->zbin,
|
||||
p->round_fp, p->quant_fp, p->quant_shift,
|
||||
qcoeff, dqcoeff, pd->dequant, p->zbin_extra,
|
||||
eob, scan_order->scan, scan_order->iscan);
|
||||
break;
|
||||
case TX_16X16:
|
||||
vp9_high_fdct16x16(src_diff, coeff, diff_stride);
|
||||
vp9_high_quantize_fp(coeff, 256, x->skip_block, p->zbin, p->round_fp,
|
||||
p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
|
||||
pd->dequant, p->zbin_extra, eob,
|
||||
scan_order->scan, scan_order->iscan);
|
||||
break;
|
||||
case TX_8X8:
|
||||
vp9_high_fdct8x8(src_diff, coeff, diff_stride);
|
||||
vp9_high_quantize_fp(coeff, 64, x->skip_block, p->zbin, p->round_fp,
|
||||
p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
|
||||
pd->dequant, p->zbin_extra, eob,
|
||||
scan_order->scan, scan_order->iscan);
|
||||
break;
|
||||
case TX_4X4:
|
||||
x->fwd_txm4x4(src_diff, coeff, diff_stride);
|
||||
vp9_high_quantize_fp(coeff, 16, x->skip_block, p->zbin, p->round_fp,
|
||||
p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
|
||||
pd->dequant, p->zbin_extra, eob,
|
||||
scan_order->scan, scan_order->iscan);
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
return;
|
||||
}
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
|
||||
switch (tx_size) {
|
||||
case TX_32X32:
|
||||
fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
|
||||
@ -379,6 +478,40 @@ void vp9_xform_quant_dc(MACROBLOCK *x, int plane, int block,
|
||||
txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
|
||||
src_diff = &p->src_diff[4 * (j * diff_stride + i)];
|
||||
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
||||
switch (tx_size) {
|
||||
case TX_32X32:
|
||||
vp9_high_fdct32x32_1(src_diff, coeff, diff_stride);
|
||||
vp9_high_quantize_dc_32x32(coeff, x->skip_block, p->round,
|
||||
p->quant_fp[0], qcoeff, dqcoeff,
|
||||
pd->dequant[0], eob);
|
||||
break;
|
||||
case TX_16X16:
|
||||
vp9_high_fdct16x16_1(src_diff, coeff, diff_stride);
|
||||
vp9_high_quantize_dc(coeff, x->skip_block, p->round,
|
||||
p->quant_fp[0], qcoeff, dqcoeff,
|
||||
pd->dequant[0], eob);
|
||||
break;
|
||||
case TX_8X8:
|
||||
vp9_high_fdct8x8_1(src_diff, coeff, diff_stride);
|
||||
vp9_high_quantize_dc(coeff, x->skip_block, p->round,
|
||||
p->quant_fp[0], qcoeff, dqcoeff,
|
||||
pd->dequant[0], eob);
|
||||
break;
|
||||
case TX_4X4:
|
||||
x->fwd_txm4x4(src_diff, coeff, diff_stride);
|
||||
vp9_high_quantize_dc(coeff, x->skip_block, p->round,
|
||||
p->quant_fp[0], qcoeff, dqcoeff,
|
||||
pd->dequant[0], eob);
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
return;
|
||||
}
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
|
||||
switch (tx_size) {
|
||||
case TX_32X32:
|
||||
vp9_fdct32x32_1(src_diff, coeff, diff_stride);
|
||||
@ -426,6 +559,44 @@ void vp9_xform_quant(MACROBLOCK *x, int plane, int block,
|
||||
txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
|
||||
src_diff = &p->src_diff[4 * (j * diff_stride + i)];
|
||||
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
||||
switch (tx_size) {
|
||||
case TX_32X32:
|
||||
high_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
|
||||
vp9_high_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin,
|
||||
p->round, p->quant, p->quant_shift, qcoeff,
|
||||
dqcoeff, pd->dequant, p->zbin_extra, eob,
|
||||
scan_order->scan, scan_order->iscan);
|
||||
break;
|
||||
case TX_16X16:
|
||||
vp9_high_fdct16x16(src_diff, coeff, diff_stride);
|
||||
vp9_high_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round,
|
||||
p->quant, p->quant_shift, qcoeff, dqcoeff,
|
||||
pd->dequant, p->zbin_extra, eob,
|
||||
scan_order->scan, scan_order->iscan);
|
||||
break;
|
||||
case TX_8X8:
|
||||
vp9_high_fdct8x8(src_diff, coeff, diff_stride);
|
||||
vp9_high_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round,
|
||||
p->quant, p->quant_shift, qcoeff, dqcoeff,
|
||||
pd->dequant, p->zbin_extra, eob,
|
||||
scan_order->scan, scan_order->iscan);
|
||||
break;
|
||||
case TX_4X4:
|
||||
x->fwd_txm4x4(src_diff, coeff, diff_stride);
|
||||
vp9_high_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round,
|
||||
p->quant, p->quant_shift, qcoeff, dqcoeff,
|
||||
pd->dequant, p->zbin_extra, eob,
|
||||
scan_order->scan, scan_order->iscan);
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
return;
|
||||
}
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
|
||||
switch (tx_size) {
|
||||
case TX_32X32:
|
||||
fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
|
||||
@ -520,6 +691,34 @@ static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize,
|
||||
|
||||
if (x->skip_encode || p->eobs[block] == 0)
|
||||
return;
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
||||
switch (tx_size) {
|
||||
case TX_32X32:
|
||||
vp9_high_idct32x32_add(dqcoeff, dst, pd->dst.stride,
|
||||
p->eobs[block], xd->bd);
|
||||
break;
|
||||
case TX_16X16:
|
||||
vp9_high_idct16x16_add(dqcoeff, dst, pd->dst.stride,
|
||||
p->eobs[block], xd->bd);
|
||||
break;
|
||||
case TX_8X8:
|
||||
vp9_high_idct8x8_add(dqcoeff, dst, pd->dst.stride,
|
||||
p->eobs[block], xd->bd);
|
||||
break;
|
||||
case TX_4X4:
|
||||
// this is like vp9_short_idct4x4 but has a special case around eob<=1
|
||||
// which is significant (not just an optimization) for the lossless
|
||||
// case.
|
||||
x->high_itxm_add(dqcoeff, dst, pd->dst.stride,
|
||||
p->eobs[block], xd->bd);
|
||||
break;
|
||||
default:
|
||||
assert(0 && "Invalid transform size");
|
||||
}
|
||||
return;
|
||||
}
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
|
||||
switch (tx_size) {
|
||||
case TX_32X32:
|
||||
@ -557,8 +756,15 @@ static void encode_block_pass1(int plane, int block, BLOCK_SIZE plane_bsize,
|
||||
|
||||
vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
|
||||
|
||||
if (p->eobs[block] > 0)
|
||||
if (p->eobs[block] > 0) {
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
||||
x->high_itxm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block], xd->bd);
|
||||
return;
|
||||
}
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
x->itxm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
|
||||
}
|
||||
}
|
||||
|
||||
void vp9_encode_sby_pass1(MACROBLOCK *x, BLOCK_SIZE bsize) {
|
||||
@ -622,6 +828,115 @@ static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
|
||||
src = &p->src.buf[4 * (j * src_stride + i)];
|
||||
src_diff = &p->src_diff[4 * (j * diff_stride + i)];
|
||||
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
||||
switch (tx_size) {
|
||||
case TX_32X32:
|
||||
scan_order = &vp9_default_scan_orders[TX_32X32];
|
||||
mode = plane == 0 ? mbmi->mode : mbmi->uv_mode;
|
||||
vp9_predict_intra_block(xd, block >> 6, bwl, TX_32X32, mode,
|
||||
x->skip_encode ? src : dst,
|
||||
x->skip_encode ? src_stride : dst_stride,
|
||||
dst, dst_stride, i, j, plane);
|
||||
if (!x->skip_recode) {
|
||||
vp9_high_subtract_block(32, 32, src_diff, diff_stride,
|
||||
src, src_stride, dst, dst_stride, xd->bd);
|
||||
high_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
|
||||
vp9_high_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin,
|
||||
p->round, p->quant, p->quant_shift, qcoeff,
|
||||
dqcoeff, pd->dequant, p->zbin_extra, eob,
|
||||
scan_order->scan, scan_order->iscan);
|
||||
}
|
||||
if (!x->skip_encode && *eob) {
|
||||
vp9_high_idct32x32_add(dqcoeff, dst, dst_stride, *eob, xd->bd);
|
||||
}
|
||||
break;
|
||||
case TX_16X16:
|
||||
tx_type = get_tx_type(pd->plane_type, xd);
|
||||
scan_order = &vp9_scan_orders[TX_16X16][tx_type];
|
||||
mode = plane == 0 ? mbmi->mode : mbmi->uv_mode;
|
||||
vp9_predict_intra_block(xd, block >> 4, bwl, TX_16X16, mode,
|
||||
x->skip_encode ? src : dst,
|
||||
x->skip_encode ? src_stride : dst_stride,
|
||||
dst, dst_stride, i, j, plane);
|
||||
if (!x->skip_recode) {
|
||||
vp9_high_subtract_block(16, 16, src_diff, diff_stride,
|
||||
src, src_stride, dst, dst_stride, xd->bd);
|
||||
vp9_high_fht16x16(src_diff, coeff, diff_stride, tx_type);
|
||||
vp9_high_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round,
|
||||
p->quant, p->quant_shift, qcoeff, dqcoeff,
|
||||
pd->dequant, p->zbin_extra, eob,
|
||||
scan_order->scan, scan_order->iscan);
|
||||
}
|
||||
if (!x->skip_encode && *eob) {
|
||||
vp9_high_iht16x16_add(tx_type, dqcoeff, dst, dst_stride,
|
||||
*eob, xd->bd);
|
||||
}
|
||||
break;
|
||||
case TX_8X8:
|
||||
tx_type = get_tx_type(pd->plane_type, xd);
|
||||
scan_order = &vp9_scan_orders[TX_8X8][tx_type];
|
||||
mode = plane == 0 ? mbmi->mode : mbmi->uv_mode;
|
||||
vp9_predict_intra_block(xd, block >> 2, bwl, TX_8X8, mode,
|
||||
x->skip_encode ? src : dst,
|
||||
x->skip_encode ? src_stride : dst_stride,
|
||||
dst, dst_stride, i, j, plane);
|
||||
if (!x->skip_recode) {
|
||||
vp9_high_subtract_block(8, 8, src_diff, diff_stride,
|
||||
src, src_stride, dst, dst_stride, xd->bd);
|
||||
vp9_high_fht8x8(src_diff, coeff, diff_stride, tx_type);
|
||||
vp9_high_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round,
|
||||
p->quant, p->quant_shift, qcoeff, dqcoeff,
|
||||
pd->dequant, p->zbin_extra, eob,
|
||||
scan_order->scan, scan_order->iscan);
|
||||
}
|
||||
if (!x->skip_encode && *eob) {
|
||||
vp9_high_iht8x8_add(tx_type, dqcoeff, dst, dst_stride, *eob,
|
||||
xd->bd);
|
||||
}
|
||||
break;
|
||||
case TX_4X4:
|
||||
tx_type = get_tx_type_4x4(pd->plane_type, xd, block);
|
||||
scan_order = &vp9_scan_orders[TX_4X4][tx_type];
|
||||
mode = plane == 0 ? get_y_mode(xd->mi[0].src_mi, block) : mbmi->uv_mode;
|
||||
vp9_predict_intra_block(xd, block, bwl, TX_4X4, mode,
|
||||
x->skip_encode ? src : dst,
|
||||
x->skip_encode ? src_stride : dst_stride,
|
||||
dst, dst_stride, i, j, plane);
|
||||
|
||||
if (!x->skip_recode) {
|
||||
vp9_high_subtract_block(4, 4, src_diff, diff_stride,
|
||||
src, src_stride, dst, dst_stride, xd->bd);
|
||||
if (tx_type != DCT_DCT)
|
||||
vp9_high_fht4x4(src_diff, coeff, diff_stride, tx_type);
|
||||
else
|
||||
x->fwd_txm4x4(src_diff, coeff, diff_stride);
|
||||
vp9_high_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round,
|
||||
p->quant, p->quant_shift, qcoeff, dqcoeff,
|
||||
pd->dequant, p->zbin_extra, eob,
|
||||
scan_order->scan, scan_order->iscan);
|
||||
}
|
||||
|
||||
if (!x->skip_encode && *eob) {
|
||||
if (tx_type == DCT_DCT)
|
||||
// this is like vp9_short_idct4x4 but has a special case around
|
||||
// eob<=1 which is significant (not just an optimization) for the
|
||||
// lossless case.
|
||||
x->high_itxm_add(dqcoeff, dst, dst_stride, *eob, xd->bd);
|
||||
else
|
||||
vp9_high_iht4x4_16_add(dqcoeff, dst, dst_stride, tx_type, xd->bd);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
return;
|
||||
}
|
||||
if (*eob)
|
||||
*(args->skip) = 0;
|
||||
return;
|
||||
}
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
|
||||
switch (tx_size) {
|
||||
case TX_32X32:
|
||||
scan_order = &vp9_default_scan_orders[TX_32X32];
|
||||
|
@ -55,6 +55,52 @@ static void copy_and_extend_plane(const uint8_t *src, int src_pitch,
|
||||
}
|
||||
}
|
||||
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
static void highbd_copy_and_extend_plane(const uint8_t *src8, int src_pitch,
|
||||
uint8_t *dst8, int dst_pitch,
|
||||
int w, int h,
|
||||
int extend_top, int extend_left,
|
||||
int extend_bottom, int extend_right) {
|
||||
int i, linesize;
|
||||
uint16_t *src = CONVERT_TO_SHORTPTR(src8);
|
||||
uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
|
||||
|
||||
// copy the left and right most columns out
|
||||
const uint16_t *src_ptr1 = src;
|
||||
const uint16_t *src_ptr2 = src + w - 1;
|
||||
uint16_t *dst_ptr1 = dst - extend_left;
|
||||
uint16_t *dst_ptr2 = dst + w;
|
||||
|
||||
for (i = 0; i < h; i++) {
|
||||
vpx_memset16(dst_ptr1, src_ptr1[0], extend_left);
|
||||
vpx_memcpy(dst_ptr1 + extend_left, src_ptr1, w * sizeof(uint16_t));
|
||||
vpx_memset16(dst_ptr2, src_ptr2[0], extend_right);
|
||||
src_ptr1 += src_pitch;
|
||||
src_ptr2 += src_pitch;
|
||||
dst_ptr1 += dst_pitch;
|
||||
dst_ptr2 += dst_pitch;
|
||||
}
|
||||
|
||||
// Now copy the top and bottom lines into each line of the respective
|
||||
// borders
|
||||
src_ptr1 = dst - extend_left;
|
||||
src_ptr2 = dst + dst_pitch * (h - 1) - extend_left;
|
||||
dst_ptr1 = dst + dst_pitch * (-extend_top) - extend_left;
|
||||
dst_ptr2 = dst + dst_pitch * (h) - extend_left;
|
||||
linesize = extend_left + extend_right + w;
|
||||
|
||||
for (i = 0; i < extend_top; i++) {
|
||||
vpx_memcpy(dst_ptr1, src_ptr1, linesize * sizeof(uint16_t));
|
||||
dst_ptr1 += dst_pitch;
|
||||
}
|
||||
|
||||
for (i = 0; i < extend_bottom; i++) {
|
||||
vpx_memcpy(dst_ptr2, src_ptr2, linesize * sizeof(uint16_t));
|
||||
dst_ptr2 += dst_pitch;
|
||||
}
|
||||
}
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
|
||||
void vp9_copy_and_extend_frame(const YV12_BUFFER_CONFIG *src,
|
||||
YV12_BUFFER_CONFIG *dst) {
|
||||
// Extend src frame in buffer
|
||||
@ -75,6 +121,26 @@ void vp9_copy_and_extend_frame(const YV12_BUFFER_CONFIG *src,
|
||||
const int eb_uv = eb_y >> uv_height_subsampling;
|
||||
const int er_uv = er_y >> uv_width_subsampling;
|
||||
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
if (src->flags & YV12_FLAG_HIGHBITDEPTH) {
|
||||
highbd_copy_and_extend_plane(src->y_buffer, src->y_stride,
|
||||
dst->y_buffer, dst->y_stride,
|
||||
src->y_width, src->y_height,
|
||||
et_y, el_y, eb_y, er_y);
|
||||
|
||||
highbd_copy_and_extend_plane(src->u_buffer, src->uv_stride,
|
||||
dst->u_buffer, dst->uv_stride,
|
||||
src->uv_width, src->uv_height,
|
||||
et_uv, el_uv, eb_uv, er_uv);
|
||||
|
||||
highbd_copy_and_extend_plane(src->v_buffer, src->uv_stride,
|
||||
dst->v_buffer, dst->uv_stride,
|
||||
src->uv_width, src->uv_height,
|
||||
et_uv, el_uv, eb_uv, er_uv);
|
||||
return;
|
||||
}
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
|
||||
copy_and_extend_plane(src->y_buffer, src->y_stride,
|
||||
dst->y_buffer, dst->y_stride,
|
||||
src->y_width, src->y_height,
|
||||
|
@ -281,6 +281,60 @@ static unsigned int get_prediction_error(BLOCK_SIZE bsize,
|
||||
return sse;
|
||||
}
|
||||
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
static vp9_variance_fn_t highbd_get_block_variance_fn(BLOCK_SIZE bsize,
|
||||
int bd) {
|
||||
switch (bd) {
|
||||
default:
|
||||
switch (bsize) {
|
||||
case BLOCK_8X8:
|
||||
return vp9_high_mse8x8;
|
||||
case BLOCK_16X8:
|
||||
return vp9_high_mse16x8;
|
||||
case BLOCK_8X16:
|
||||
return vp9_high_mse8x16;
|
||||
default:
|
||||
return vp9_high_mse16x16;
|
||||
}
|
||||
break;
|
||||
case 10:
|
||||
switch (bsize) {
|
||||
case BLOCK_8X8:
|
||||
return vp9_high_10_mse8x8;
|
||||
case BLOCK_16X8:
|
||||
return vp9_high_10_mse16x8;
|
||||
case BLOCK_8X16:
|
||||
return vp9_high_10_mse8x16;
|
||||
default:
|
||||
return vp9_high_10_mse16x16;
|
||||
}
|
||||
break;
|
||||
case 12:
|
||||
switch (bsize) {
|
||||
case BLOCK_8X8:
|
||||
return vp9_high_12_mse8x8;
|
||||
case BLOCK_16X8:
|
||||
return vp9_high_12_mse16x8;
|
||||
case BLOCK_8X16:
|
||||
return vp9_high_12_mse8x16;
|
||||
default:
|
||||
return vp9_high_12_mse16x16;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned int highbd_get_prediction_error(BLOCK_SIZE bsize,
|
||||
const struct buf_2d *src,
|
||||
const struct buf_2d *ref,
|
||||
int bd) {
|
||||
unsigned int sse;
|
||||
const vp9_variance_fn_t fn = highbd_get_block_variance_fn(bsize, bd);
|
||||
fn(src->buf, src->stride, ref->buf, ref->stride, &sse);
|
||||
return sse;
|
||||
}
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
|
||||
// Refine the motion search range according to the frame dimension
|
||||
// for first pass test.
|
||||
static int get_search_range(const VP9_COMMON *cm) {
|
||||
@ -311,6 +365,11 @@ static void first_pass_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
|
||||
|
||||
// Override the default variance function to use MSE.
|
||||
v_fn_ptr.vf = get_block_variance_fn(bsize);
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
||||
v_fn_ptr.vf = highbd_get_block_variance_fn(bsize, xd->bd);
|
||||
}
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
|
||||
// Center the initial step/diamond search on best mv.
|
||||
tmp_err = cpi->diamond_search_sad(x, &cpi->ss_cfg, &ref_mv_full, &tmp_mv,
|
||||
@ -562,6 +621,24 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) {
|
||||
(bsize >= BLOCK_16X16 ? TX_16X16 : TX_8X8) : TX_4X4;
|
||||
vp9_encode_intra_block_plane(x, bsize, 0);
|
||||
this_error = vp9_get_mb_ss(x->plane[0].src_diff);
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
if (cm->use_highbitdepth) {
|
||||
switch (cm->bit_depth) {
|
||||
case VPX_BITS_8:
|
||||
break;
|
||||
case VPX_BITS_10:
|
||||
this_error >>= 4;
|
||||
break;
|
||||
case VPX_BITS_12:
|
||||
this_error >>= 8;
|
||||
break;
|
||||
default:
|
||||
assert(0 && "cm->bit_depth should be VPX_BITS_8, "
|
||||
"VPX_BITS_10 or VPX_BITS_12");
|
||||
return;
|
||||
}
|
||||
}
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
|
||||
if (cpi->oxcf.aq_mode == VARIANCE_AQ) {
|
||||
vp9_clear_system_state();
|
||||
@ -601,8 +678,18 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) {
|
||||
struct buf_2d unscaled_last_source_buf_2d;
|
||||
|
||||
xd->plane[0].pre[0].buf = first_ref_buf->y_buffer + recon_yoffset;
|
||||
motion_error = get_prediction_error(bsize, &x->plane[0].src,
|
||||
&xd->plane[0].pre[0]);
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
||||
motion_error = highbd_get_prediction_error(
|
||||
bsize, &x->plane[0].src, &xd->plane[0].pre[0], xd->bd);
|
||||
} else {
|
||||
motion_error = get_prediction_error(
|
||||
bsize, &x->plane[0].src, &xd->plane[0].pre[0]);
|
||||
}
|
||||
#else
|
||||
motion_error = get_prediction_error(
|
||||
bsize, &x->plane[0].src, &xd->plane[0].pre[0]);
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
|
||||
// Compute the motion error of the 0,0 motion using the last source
|
||||
// frame as the reference. Skip the further motion search on
|
||||
@ -611,8 +698,18 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) {
|
||||
cpi->unscaled_last_source->y_buffer + recon_yoffset;
|
||||
unscaled_last_source_buf_2d.stride =
|
||||
cpi->unscaled_last_source->y_stride;
|
||||
raw_motion_error = get_prediction_error(bsize, &x->plane[0].src,
|
||||
&unscaled_last_source_buf_2d);
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
||||
raw_motion_error = highbd_get_prediction_error(
|
||||
bsize, &x->plane[0].src, &unscaled_last_source_buf_2d, xd->bd);
|
||||
} else {
|
||||
raw_motion_error = get_prediction_error(
|
||||
bsize, &x->plane[0].src, &unscaled_last_source_buf_2d);
|
||||
}
|
||||
#else
|
||||
raw_motion_error = get_prediction_error(
|
||||
bsize, &x->plane[0].src, &unscaled_last_source_buf_2d);
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
|
||||
// TODO(pengchong): Replace the hard-coded threshold
|
||||
if (raw_motion_error > 25 || lc != NULL) {
|
||||
@ -648,8 +745,18 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) {
|
||||
int gf_motion_error;
|
||||
|
||||
xd->plane[0].pre[0].buf = gld_yv12->y_buffer + recon_yoffset;
|
||||
gf_motion_error = get_prediction_error(bsize, &x->plane[0].src,
|
||||
&xd->plane[0].pre[0]);
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
||||
gf_motion_error = highbd_get_prediction_error(
|
||||
bsize, &x->plane[0].src, &xd->plane[0].pre[0], xd->bd);
|
||||
} else {
|
||||
gf_motion_error = get_prediction_error(
|
||||
bsize, &x->plane[0].src, &xd->plane[0].pre[0]);
|
||||
}
|
||||
#else
|
||||
gf_motion_error = get_prediction_error(
|
||||
bsize, &x->plane[0].src, &xd->plane[0].pre[0]);
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
|
||||
first_pass_motion_search(cpi, x, &zero_mv, &tmp_mv,
|
||||
&gf_motion_error);
|
||||
|
@ -284,16 +284,7 @@ static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c) {
|
||||
int tc = bc; \
|
||||
\
|
||||
bestmv->row *= 8; \
|
||||
bestmv->col *= 8; \
|
||||
if (second_pred != NULL) { \
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64); \
|
||||
vp9_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride); \
|
||||
besterr = vfp->vf(comp_pred, w, z, src_stride, sse1); \
|
||||
} else { \
|
||||
besterr = vfp->vf(y + offset, y_stride, z, src_stride, sse1); \
|
||||
} \
|
||||
*distortion = besterr; \
|
||||
besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
|
||||
bestmv->col *= 8;
|
||||
|
||||
int vp9_find_best_sub_pixel_tree_pruned(const MACROBLOCK *x,
|
||||
MV *bestmv, const MV *ref_mv,
|
||||
@ -309,6 +300,29 @@ int vp9_find_best_sub_pixel_tree_pruned(const MACROBLOCK *x,
|
||||
const uint8_t *second_pred,
|
||||
int w, int h) {
|
||||
SETUP_SUBPEL_SEARCH;
|
||||
if (second_pred != NULL) {
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
||||
DECLARE_ALIGNED_ARRAY(16, uint16_t, comp_pred16, 64 * 64);
|
||||
vp9_high_comp_avg_pred(comp_pred16, second_pred, w, h, y + offset,
|
||||
y_stride);
|
||||
besterr = vfp->vf(CONVERT_TO_BYTEPTR(comp_pred16), w, z, src_stride,
|
||||
sse1);
|
||||
} else {
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64);
|
||||
vp9_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride);
|
||||
besterr = vfp->vf(comp_pred, w, z, src_stride, sse1);
|
||||
}
|
||||
#else
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64);
|
||||
vp9_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride);
|
||||
besterr = vfp->vf(comp_pred, w, z, src_stride, sse1);
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
} else {
|
||||
besterr = vfp->vf(y + offset, y_stride, z, src_stride, sse1);
|
||||
}
|
||||
*distortion = besterr;
|
||||
besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
|
||||
|
||||
if (sad_list &&
|
||||
sad_list[0] != INT_MAX && sad_list[1] != INT_MAX &&
|
||||
@ -401,6 +415,29 @@ int vp9_find_best_sub_pixel_tree(const MACROBLOCK *x,
|
||||
const uint8_t *second_pred,
|
||||
int w, int h) {
|
||||
SETUP_SUBPEL_SEARCH;
|
||||
if (second_pred != NULL) {
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
||||
DECLARE_ALIGNED_ARRAY(16, uint16_t, comp_pred16, 64 * 64);
|
||||
vp9_high_comp_avg_pred(comp_pred16, second_pred, w, h, y + offset,
|
||||
y_stride);
|
||||
besterr = vfp->vf(CONVERT_TO_BYTEPTR(comp_pred16), w, z, src_stride,
|
||||
sse1);
|
||||
} else {
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64);
|
||||
vp9_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride);
|
||||
besterr = vfp->vf(comp_pred, w, z, src_stride, sse1);
|
||||
}
|
||||
#else
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64);
|
||||
vp9_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride);
|
||||
besterr = vfp->vf(comp_pred, w, z, src_stride, sse1);
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
} else {
|
||||
besterr = vfp->vf(y + offset, y_stride, z, src_stride, sse1);
|
||||
}
|
||||
*distortion = besterr;
|
||||
besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
|
||||
(void) sad_list; // to silence compiler warning
|
||||
|
||||
// Each subsequent iteration checks at least one point in
|
||||
|
@ -40,7 +40,15 @@ static int try_filter_frame(const YV12_BUFFER_CONFIG *sd, VP9_COMP *const cpi,
|
||||
|
||||
vp9_loop_filter_frame(cm->frame_to_show, cm, &cpi->mb.e_mbd, filt_level, 1,
|
||||
partial_frame);
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
if (cm->use_highbitdepth) {
|
||||
filt_err = vp9_highbd_get_y_sse(sd, cm->frame_to_show, cm->bit_depth);
|
||||
} else {
|
||||
filt_err = vp9_get_y_sse(sd, cm->frame_to_show);
|
||||
}
|
||||
#else
|
||||
filt_err = vp9_get_y_sse(sd, cm->frame_to_show);
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
|
||||
// Re-instate the unfiltered frame
|
||||
vpx_yv12_copy_y(&cpi->last_frame_uf, cm->frame_to_show);
|
||||
@ -145,7 +153,26 @@ void vp9_pick_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi,
|
||||
const int q = vp9_ac_quant(cm->base_qindex, 0, cm->bit_depth);
|
||||
// These values were determined by linear fitting the result of the
|
||||
// searched level, filt_guess = q * 0.316206 + 3.87252
|
||||
#if CONFIG_VP9_HIGHDEPTH
|
||||
int filt_guess;
|
||||
switch (cm->bit_depth) {
|
||||
case VPX_BITS_8:
|
||||
filt_guess = ROUND_POWER_OF_TWO(q * 20723 + 1015158, 18);
|
||||
break;
|
||||
case VPX_BITS_10:
|
||||
filt_guess = ROUND_POWER_OF_TWO(q * 20723 + 4060632, 20);
|
||||
break;
|
||||
case VPX_BITS_12:
|
||||
filt_guess = ROUND_POWER_OF_TWO(q * 20723 + 16242526, 22);
|
||||
break;
|
||||
default:
|
||||
assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 "
|
||||
"or VPX_BITS_12");
|
||||
return;
|
||||
}
|
||||
#else
|
||||
int filt_guess = ROUND_POWER_OF_TWO(q * 20723 + 1015158, 18);
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
if (cm->frame_type == KEY_FRAME)
|
||||
filt_guess -= 4;
|
||||
lf->filter_level = clamp(filt_guess, min_filter_level, max_filter_level);
|
||||
|
@ -241,13 +241,44 @@ static void model_rd_for_sb_y(VP9_COMP *cpi, BLOCK_SIZE bsize,
|
||||
tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
|
||||
}
|
||||
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
||||
vp9_model_rd_from_var_lapndz(sse - var, 1 << num_pels_log2_lookup[bsize],
|
||||
dc_quant >> (xd->bd - 5), &rate, &dist);
|
||||
} else {
|
||||
vp9_model_rd_from_var_lapndz(sse - var, 1 << num_pels_log2_lookup[bsize],
|
||||
dc_quant >> 3, &rate, &dist);
|
||||
}
|
||||
#else
|
||||
vp9_model_rd_from_var_lapndz(sse - var, 1 << num_pels_log2_lookup[bsize],
|
||||
dc_quant >> 3, &rate, &dist);
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
|
||||
*out_rate_sum = rate >> 1;
|
||||
*out_dist_sum = dist << 3;
|
||||
|
||||
vp9_model_rd_from_var_lapndz(var, 1 << num_pels_log2_lookup[bsize],
|
||||
ac_quant >> 3, &rate, &dist);
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
||||
vp9_model_rd_from_var_lapndz(var,
|
||||
1 << num_pels_log2_lookup[bsize],
|
||||
ac_quant >> (xd->bd - 5),
|
||||
&rate,
|
||||
&dist);
|
||||
} else {
|
||||
vp9_model_rd_from_var_lapndz(var,
|
||||
1 << num_pels_log2_lookup[bsize],
|
||||
ac_quant >> 3,
|
||||
&rate,
|
||||
&dist);
|
||||
}
|
||||
#else
|
||||
vp9_model_rd_from_var_lapndz(var,
|
||||
1 << num_pels_log2_lookup[bsize],
|
||||
ac_quant >> 3,
|
||||
&rate,
|
||||
&dist);
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
|
||||
*out_rate_sum += rate;
|
||||
*out_dist_sum += dist << 4;
|
||||
}
|
||||
@ -293,9 +324,17 @@ static void encode_breakout_test(VP9_COMP *cpi, MACROBLOCK *x,
|
||||
// The encode_breakout input
|
||||
const unsigned int min_thresh =
|
||||
MIN(((unsigned int)x->encode_breakout << 4), max_thresh);
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
const int shift = 2 * xd->bd - 16;
|
||||
#endif
|
||||
|
||||
// Calculate threshold according to dequant value.
|
||||
thresh_ac = (xd->plane[0].dequant[1] * xd->plane[0].dequant[1]) / 9;
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
if ((xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) && shift > 0) {
|
||||
thresh_ac = ROUND_POWER_OF_TWO(thresh_ac, shift);
|
||||
}
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
thresh_ac = clamp(thresh_ac, min_thresh, max_thresh);
|
||||
|
||||
// Adjust ac threshold according to partition size.
|
||||
@ -303,6 +342,11 @@ static void encode_breakout_test(VP9_COMP *cpi, MACROBLOCK *x,
|
||||
8 - (b_width_log2(bsize) + b_height_log2(bsize));
|
||||
|
||||
thresh_dc = (xd->plane[0].dequant[0] * xd->plane[0].dequant[0] >> 6);
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
if ((xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) && shift > 0) {
|
||||
thresh_dc = ROUND_POWER_OF_TWO(thresh_dc, shift);
|
||||
}
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
} else {
|
||||
thresh_ac = 0;
|
||||
thresh_dc = 0;
|
||||
@ -438,9 +482,8 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
|
||||
// var_y and sse_y are saved to be used in skipping checking
|
||||
unsigned int var_y = UINT_MAX;
|
||||
unsigned int sse_y = UINT_MAX;
|
||||
|
||||
const int intra_cost_penalty =
|
||||
20 * vp9_dc_quant(cm->base_qindex, cm->y_dc_delta_q, cm->bit_depth);
|
||||
const int intra_cost_penalty = vp9_get_intra_cost_penalty(
|
||||
cm->base_qindex, cm->y_dc_delta_q, cm->bit_depth);
|
||||
const int64_t inter_mode_thresh = RDCOST(x->rdmult, x->rddiv,
|
||||
intra_cost_penalty, 0);
|
||||
const int intra_mode_cost = 50;
|
||||
@ -461,14 +504,25 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
|
||||
// tmp[3] points to dst buffer, and the other 3 point to allocated buffers.
|
||||
PRED_BUFFER tmp[4];
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, pred_buf, 3 * 64 * 64);
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
DECLARE_ALIGNED_ARRAY(16, uint16_t, pred_buf_16, 3 * 64 * 64);
|
||||
#endif
|
||||
struct buf_2d orig_dst = pd->dst;
|
||||
PRED_BUFFER *best_pred = NULL;
|
||||
PRED_BUFFER *this_mode_pred = NULL;
|
||||
const int pixels_in_block = bh * bw;
|
||||
|
||||
if (cpi->sf.reuse_inter_pred_sby) {
|
||||
int i;
|
||||
for (i = 0; i < 3; i++) {
|
||||
tmp[i].data = &pred_buf[bw * bh * i];
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
if (cm->use_highbitdepth)
|
||||
tmp[i].data = CONVERT_TO_BYTEPTR(&pred_buf_16[pixels_in_block * i]);
|
||||
else
|
||||
tmp[i].data = &pred_buf[pixels_in_block * i];
|
||||
#else
|
||||
tmp[i].data = &pred_buf[pixels_in_block * i];
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
tmp[i].stride = bw;
|
||||
tmp[i].in_use = 0;
|
||||
}
|
||||
@ -703,8 +757,18 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
|
||||
if (best_pred != NULL && cpi->sf.reuse_inter_pred_sby &&
|
||||
best_pred->data != orig_dst.buf) {
|
||||
pd->dst = orig_dst;
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
if (cm->use_highbitdepth) {
|
||||
vp9_high_convolve_copy(best_pred->data, bw, pd->dst.buf, pd->dst.stride,
|
||||
NULL, 0, NULL, 0, bw, bh, xd->bd);
|
||||
} else {
|
||||
vp9_convolve_copy(best_pred->data, bw, pd->dst.buf, pd->dst.stride,
|
||||
NULL, 0, NULL, 0, bw, bh);
|
||||
}
|
||||
#else
|
||||
vp9_convolve_copy(best_pred->data, bw, pd->dst.buf, pd->dst.stride, NULL, 0,
|
||||
NULL, 0, bw, bh);
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
}
|
||||
|
||||
mbmi->mode = best_mode;
|
||||
|
@ -155,7 +155,7 @@ int vp9_compute_rd_mult(const VP9_COMP *cpi, int qindex) {
|
||||
}
|
||||
#else
|
||||
int rdmult = 88 * q * q / 24;
|
||||
#endif
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
if (cpi->oxcf.pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
|
||||
const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
|
||||
const FRAME_UPDATE_TYPE frame_type = gf_group->update_type[gf_group->index];
|
||||
@ -187,7 +187,7 @@ static int compute_rd_thresh_factor(int qindex, vpx_bit_depth_t bit_depth) {
|
||||
#else
|
||||
(void) bit_depth;
|
||||
q = vp9_dc_quant(qindex, 0, VPX_BITS_8) / 4.0;
|
||||
#endif
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
// TODO(debargha): Adjust the function below.
|
||||
return MAX((int)(pow(q, RD_THRESH_POW) * 5.12), 8);
|
||||
}
|
||||
@ -213,7 +213,7 @@ void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex) {
|
||||
#else
|
||||
cpi->mb.sadperbit16 = sad_per_bit16lut_8[qindex];
|
||||
cpi->mb.sadperbit4 = sad_per_bit4lut_8[qindex];
|
||||
#endif
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
}
|
||||
|
||||
static void set_block_thresholds(const VP9_COMMON *cm, RD_OPT *rd) {
|
||||
@ -598,3 +598,24 @@ void vp9_set_rd_speed_thresholds_sub8x8(VP9_COMP *cpi) {
|
||||
if (sf->disable_split_mask & (1 << i))
|
||||
rd->thresh_mult_sub8x8[i] = INT_MAX;
|
||||
}
|
||||
|
||||
int vp9_get_intra_cost_penalty(int qindex, int qdelta,
|
||||
vpx_bit_depth_t bit_depth) {
|
||||
const int q = vp9_dc_quant(qindex, qdelta, bit_depth);
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
switch (bit_depth) {
|
||||
case VPX_BITS_8:
|
||||
return 20 * q;
|
||||
case VPX_BITS_10:
|
||||
return 5 * q;
|
||||
case VPX_BITS_12:
|
||||
return ROUND_POWER_OF_TWO(5 * q, 2);
|
||||
default:
|
||||
assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
|
||||
return -1;
|
||||
}
|
||||
#else
|
||||
return 20 * q;
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
}
|
||||
|
||||
|
@ -162,6 +162,10 @@ void vp9_setup_pred_block(const MACROBLOCKD *xd,
|
||||
int mi_row, int mi_col,
|
||||
const struct scale_factors *scale,
|
||||
const struct scale_factors *scale_uv);
|
||||
|
||||
int vp9_get_intra_cost_penalty(int qindex, int qdelta,
|
||||
vpx_bit_depth_t bit_depth);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
@ -228,9 +228,13 @@ static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize,
|
||||
// Fast approximate the modelling function.
|
||||
if (cpi->oxcf.speed > 4) {
|
||||
int64_t rate;
|
||||
int64_t dist;
|
||||
int64_t square_error = sse;
|
||||
int quantizer = (pd->dequant[1] >> 3);
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
||||
quantizer >>= (xd->bd - 8);
|
||||
}
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
|
||||
if (quantizer < 120)
|
||||
rate = (square_error * (280 - quantizer)) >> 8;
|
||||
@ -240,8 +244,19 @@ static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize,
|
||||
rate_sum += rate;
|
||||
dist_sum += dist;
|
||||
} else {
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
||||
vp9_model_rd_from_var_lapndz(sse, 1 << num_pels_log2_lookup[bs],
|
||||
pd->dequant[1] >> (xd->bd - 5),
|
||||
&rate, &dist);
|
||||
} else {
|
||||
vp9_model_rd_from_var_lapndz(sse, 1 << num_pels_log2_lookup[bs],
|
||||
pd->dequant[1] >> 3, &rate, &dist);
|
||||
}
|
||||
#else
|
||||
vp9_model_rd_from_var_lapndz(sum_sse, 1 << num_pels_log2_lookup[bs],
|
||||
pd->dequant[1] >> 3, &rate, &dist);
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
rate_sum += rate;
|
||||
dist_sum += dist;
|
||||
}
|
||||
@ -266,6 +281,31 @@ int64_t vp9_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff,
|
||||
return error;
|
||||
}
|
||||
|
||||
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
int64_t vp9_high_block_error_c(const tran_low_t *coeff,
|
||||
const tran_low_t *dqcoeff,
|
||||
intptr_t block_size,
|
||||
int64_t *ssz, int bd) {
|
||||
int i;
|
||||
int64_t error = 0, sqcoeff = 0;
|
||||
int shift = 2 * (bd - 8);
|
||||
int rounding = shift > 0 ? 1 << (shift - 1) : 0;
|
||||
|
||||
for (i = 0; i < block_size; i++) {
|
||||
const int64_t diff = coeff[i] - dqcoeff[i];
|
||||
error += diff * diff;
|
||||
sqcoeff += (int64_t)coeff[i] * (int64_t)coeff[i];
|
||||
}
|
||||
assert(error >= 0 && sqcoeff >= 0);
|
||||
error = (error + rounding) >> shift;
|
||||
sqcoeff = (sqcoeff + rounding) >> shift;
|
||||
|
||||
*ssz = sqcoeff;
|
||||
return error;
|
||||
}
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
|
||||
/* The trailing '0' is a terminator which is used inside cost_coeffs() to
|
||||
* decide whether to include cost of a trailing EOB node or not (i.e. we
|
||||
* can skip this if the last coefficient in this transform block, e.g. the
|
||||
@ -351,8 +391,14 @@ static INLINE int cost_coeffs(MACROBLOCK *x,
|
||||
|
||||
return cost;
|
||||
}
|
||||
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
static void dist_block(int plane, int block, TX_SIZE tx_size,
|
||||
struct rdcost_block_args* args, int bd) {
|
||||
#else
|
||||
static void dist_block(int plane, int block, TX_SIZE tx_size,
|
||||
struct rdcost_block_args* args) {
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
const int ss_txfrm_size = tx_size << 1;
|
||||
MACROBLOCK* const x = args->x;
|
||||
MACROBLOCKD* const xd = &x->e_mbd;
|
||||
@ -362,14 +408,24 @@ static void dist_block(int plane, int block, TX_SIZE tx_size,
|
||||
int shift = tx_size == TX_32X32 ? 0 : 2;
|
||||
tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
|
||||
tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
args->dist = vp9_high_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
|
||||
&this_sse, bd) >> shift;
|
||||
#else
|
||||
args->dist = vp9_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
|
||||
&this_sse) >> shift;
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
args->sse = this_sse >> shift;
|
||||
|
||||
if (x->skip_encode && !is_inter_block(&xd->mi[0].src_mi->mbmi)) {
|
||||
// TODO(jingning): tune the model to better capture the distortion.
|
||||
int64_t p = (pd->dequant[1] * pd->dequant[1] *
|
||||
(1 << ss_txfrm_size)) >> (shift + 2);
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
||||
p >>= ((xd->bd - 8) * 2);
|
||||
}
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
args->dist += (p >> 4);
|
||||
args->sse += p;
|
||||
}
|
||||
@ -399,12 +455,28 @@ static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize,
|
||||
|
||||
if (!is_inter_block(mbmi)) {
|
||||
vp9_encode_block_intra(x, plane, block, plane_bsize, tx_size, &mbmi->skip);
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
||||
dist_block(plane, block, tx_size, args, xd->bd);
|
||||
} else {
|
||||
dist_block(plane, block, tx_size, args, 8);
|
||||
}
|
||||
#else
|
||||
dist_block(plane, block, tx_size, args);
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
} else if (max_txsize_lookup[plane_bsize] == tx_size) {
|
||||
if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] == 0) {
|
||||
// full forward transform and quantization
|
||||
vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
||||
dist_block(plane, block, tx_size, args, xd->bd);
|
||||
} else {
|
||||
dist_block(plane, block, tx_size, args, 8);
|
||||
}
|
||||
#else
|
||||
dist_block(plane, block, tx_size, args);
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
} else if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] == 2) {
|
||||
// compute DC coefficient
|
||||
tran_low_t *const coeff = BLOCK_OFFSET(x->plane[plane].coeff, block);
|
||||
@ -424,7 +496,15 @@ static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize,
|
||||
} else {
|
||||
// full forward transform and quantization
|
||||
vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
||||
dist_block(plane, block, tx_size, args, xd->bd);
|
||||
} else {
|
||||
dist_block(plane, block, tx_size, args, 8);
|
||||
}
|
||||
#else
|
||||
dist_block(plane, block, tx_size, args);
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
}
|
||||
|
||||
rate_block(plane, block, plane_bsize, tx_size, args);
|
||||
@ -659,6 +739,9 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
|
||||
const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
|
||||
int idx, idy;
|
||||
uint8_t best_dst[8 * 8];
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
uint16_t best_dst16[8 * 8];
|
||||
#endif
|
||||
|
||||
assert(ib < 4);
|
||||
|
||||
@ -666,6 +749,108 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
|
||||
vpx_memcpy(tl, l, sizeof(tl));
|
||||
xd->mi[0].src_mi->mbmi.tx_size = TX_4X4;
|
||||
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
||||
for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
|
||||
int64_t this_rd;
|
||||
int ratey = 0;
|
||||
int64_t distortion = 0;
|
||||
int rate = bmode_costs[mode];
|
||||
|
||||
if (!(cpi->sf.intra_y_mode_mask[TX_4X4] & (1 << mode)))
|
||||
continue;
|
||||
|
||||
// Only do the oblique modes if the best so far is
|
||||
// one of the neighboring directional modes
|
||||
if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
|
||||
if (conditional_skipintra(mode, *best_mode))
|
||||
continue;
|
||||
}
|
||||
|
||||
vpx_memcpy(tempa, ta, sizeof(ta));
|
||||
vpx_memcpy(templ, tl, sizeof(tl));
|
||||
|
||||
for (idy = 0; idy < num_4x4_blocks_high; ++idy) {
|
||||
for (idx = 0; idx < num_4x4_blocks_wide; ++idx) {
|
||||
const int block = ib + idy * 2 + idx;
|
||||
const uint8_t *const src = &src_init[idx * 4 + idy * 4 * src_stride];
|
||||
uint8_t *const dst = &dst_init[idx * 4 + idy * 4 * dst_stride];
|
||||
int16_t *const src_diff = raster_block_offset_int16(BLOCK_8X8, block,
|
||||
p->src_diff);
|
||||
tran_low_t *const coeff = BLOCK_OFFSET(x->plane[0].coeff, block);
|
||||
xd->mi[0].src_mi->bmi[block].as_mode = mode;
|
||||
vp9_predict_intra_block(xd, block, 1,
|
||||
TX_4X4, mode,
|
||||
x->skip_encode ? src : dst,
|
||||
x->skip_encode ? src_stride : dst_stride,
|
||||
dst, dst_stride, idx, idy, 0);
|
||||
vp9_high_subtract_block(4, 4, src_diff, 8, src, src_stride,
|
||||
dst, dst_stride, xd->bd);
|
||||
if (xd->lossless) {
|
||||
const scan_order *so = &vp9_default_scan_orders[TX_4X4];
|
||||
vp9_high_fwht4x4(src_diff, coeff, 8);
|
||||
vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
|
||||
ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4,
|
||||
so->scan, so->neighbors,
|
||||
cpi->sf.use_fast_coef_costing);
|
||||
if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
|
||||
goto next_highbd;
|
||||
vp9_high_iwht4x4_add(BLOCK_OFFSET(pd->dqcoeff, block),
|
||||
dst, dst_stride,
|
||||
p->eobs[block], xd->bd);
|
||||
} else {
|
||||
int64_t unused;
|
||||
const TX_TYPE tx_type = get_tx_type_4x4(PLANE_TYPE_Y, xd, block);
|
||||
const scan_order *so = &vp9_scan_orders[TX_4X4][tx_type];
|
||||
vp9_high_fht4x4(src_diff, coeff, 8, tx_type);
|
||||
vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
|
||||
ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4,
|
||||
so->scan, so->neighbors,
|
||||
cpi->sf.use_fast_coef_costing);
|
||||
distortion += vp9_high_block_error(coeff,
|
||||
BLOCK_OFFSET(pd->dqcoeff, block),
|
||||
16, &unused, xd->bd) >> 2;
|
||||
if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
|
||||
goto next_highbd;
|
||||
vp9_high_iht4x4_add(tx_type, BLOCK_OFFSET(pd->dqcoeff, block),
|
||||
dst, dst_stride, p->eobs[block], xd->bd);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
rate += ratey;
|
||||
this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
|
||||
|
||||
if (this_rd < best_rd) {
|
||||
*bestrate = rate;
|
||||
*bestratey = ratey;
|
||||
*bestdistortion = distortion;
|
||||
best_rd = this_rd;
|
||||
*best_mode = mode;
|
||||
vpx_memcpy(a, tempa, sizeof(tempa));
|
||||
vpx_memcpy(l, templ, sizeof(templ));
|
||||
for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy) {
|
||||
vpx_memcpy(best_dst16 + idy * 8,
|
||||
CONVERT_TO_SHORTPTR(dst_init + idy * dst_stride),
|
||||
num_4x4_blocks_wide * 4 * sizeof(uint16_t));
|
||||
}
|
||||
}
|
||||
next_highbd:
|
||||
{}
|
||||
}
|
||||
if (best_rd >= rd_thresh || x->skip_encode)
|
||||
return best_rd;
|
||||
|
||||
for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy) {
|
||||
vpx_memcpy(CONVERT_TO_SHORTPTR(dst_init + idy * dst_stride),
|
||||
best_dst16 + idy * 8,
|
||||
num_4x4_blocks_wide * 4 * sizeof(uint16_t));
|
||||
}
|
||||
|
||||
return best_rd;
|
||||
}
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
|
||||
for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
|
||||
int64_t this_rd;
|
||||
int ratey = 0;
|
||||
@ -1118,6 +1303,16 @@ static int64_t encode_inter_mb_segment(VP9_COMP *cpi,
|
||||
for (ref = 0; ref < 1 + is_compound; ++ref) {
|
||||
const uint8_t *pre = &pd->pre[ref].buf[raster_block_offset(BLOCK_8X8, i,
|
||||
pd->pre[ref].stride)];
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
||||
vp9_high_build_inter_predictor(pre, pd->pre[ref].stride,
|
||||
dst, pd->dst.stride,
|
||||
&mi->bmi[i].as_mv[ref].as_mv,
|
||||
&xd->block_refs[ref]->sf, width, height, ref,
|
||||
kernel, MV_PRECISION_Q3,
|
||||
mi_col * MI_SIZE + 4 * (i % 2),
|
||||
mi_row * MI_SIZE + 4 * (i / 2), xd->bd);
|
||||
} else {
|
||||
vp9_build_inter_predictor(pre, pd->pre[ref].stride,
|
||||
dst, pd->dst.stride,
|
||||
&mi->bmi[i].as_mv[ref].as_mv,
|
||||
@ -1126,11 +1321,32 @@ static int64_t encode_inter_mb_segment(VP9_COMP *cpi,
|
||||
mi_col * MI_SIZE + 4 * (i % 2),
|
||||
mi_row * MI_SIZE + 4 * (i / 2));
|
||||
}
|
||||
#else
|
||||
vp9_build_inter_predictor(pre, pd->pre[ref].stride,
|
||||
dst, pd->dst.stride,
|
||||
&mi->bmi[i].as_mv[ref].as_mv,
|
||||
&xd->block_refs[ref]->sf, width, height, ref,
|
||||
kernel, MV_PRECISION_Q3,
|
||||
mi_col * MI_SIZE + 4 * (i % 2),
|
||||
mi_row * MI_SIZE + 4 * (i / 2));
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
}
|
||||
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
||||
vp9_high_subtract_block(
|
||||
height, width, raster_block_offset_int16(BLOCK_8X8, i, p->src_diff), 8,
|
||||
src, p->src.stride, dst, pd->dst.stride, xd->bd);
|
||||
} else {
|
||||
vp9_subtract_block(
|
||||
height, width, raster_block_offset_int16(BLOCK_8X8, i, p->src_diff), 8,
|
||||
src, p->src.stride, dst, pd->dst.stride);
|
||||
}
|
||||
#else
|
||||
vp9_subtract_block(height, width,
|
||||
raster_block_offset_int16(BLOCK_8X8, i, p->src_diff), 8,
|
||||
src, p->src.stride,
|
||||
dst, pd->dst.stride);
|
||||
src, p->src.stride, dst, pd->dst.stride);
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
|
||||
k = i;
|
||||
for (idy = 0; idy < height / 4; ++idy) {
|
||||
@ -1143,8 +1359,19 @@ static int64_t encode_inter_mb_segment(VP9_COMP *cpi,
|
||||
x->fwd_txm4x4(raster_block_offset_int16(BLOCK_8X8, k, p->src_diff),
|
||||
coeff, 8);
|
||||
vp9_regular_quantize_b_4x4(x, 0, k, so->scan, so->iscan);
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
||||
thisdistortion += vp9_high_block_error(coeff,
|
||||
BLOCK_OFFSET(pd->dqcoeff, k),
|
||||
16, &ssz, xd->bd);
|
||||
} else {
|
||||
thisdistortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, k),
|
||||
16, &ssz);
|
||||
}
|
||||
#else
|
||||
thisdistortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, k),
|
||||
16, &ssz);
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
thissse += ssz;
|
||||
thisrate += cost_coeffs(x, 0, k, ta + (k & 1), tl + (k >> 1), TX_4X4,
|
||||
so->scan, so->neighbors,
|
||||
@ -1901,7 +2128,12 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
|
||||
int_mv ref_mv[2];
|
||||
int ite, ref;
|
||||
// Prediction buffer from second frame.
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
uint8_t *second_pred;
|
||||
uint8_t *second_pred_alloc;
|
||||
#else
|
||||
uint8_t *second_pred = vpx_memalign(16, pw * ph * sizeof(uint8_t));
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
const InterpKernel *kernel = vp9_get_interp_kernel(mbmi->interp_filter);
|
||||
|
||||
// Do joint motion search in compound mode to get more accurate mv.
|
||||
@ -1912,6 +2144,15 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
|
||||
vp9_get_scaled_ref_frame(cpi, mbmi->ref_frame[0]),
|
||||
vp9_get_scaled_ref_frame(cpi, mbmi->ref_frame[1])
|
||||
};
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
||||
second_pred_alloc = vpx_memalign(16, pw * ph * sizeof(uint16_t));
|
||||
second_pred = CONVERT_TO_BYTEPTR(second_pred_alloc);
|
||||
} else {
|
||||
second_pred_alloc = vpx_memalign(16, pw * ph * sizeof(uint8_t));
|
||||
second_pred = second_pred_alloc;
|
||||
}
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
|
||||
for (ref = 0; ref < 2; ++ref) {
|
||||
ref_mv[ref] = mbmi->ref_mvs[refs[ref]][0];
|
||||
@ -1950,6 +2191,28 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
|
||||
ref_yv12[1] = xd->plane[0].pre[1];
|
||||
|
||||
// Get pred block from second frame.
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
||||
vp9_high_build_inter_predictor(ref_yv12[!id].buf,
|
||||
ref_yv12[!id].stride,
|
||||
second_pred, pw,
|
||||
&frame_mv[refs[!id]].as_mv,
|
||||
&xd->block_refs[!id]->sf,
|
||||
pw, ph, 0,
|
||||
kernel, MV_PRECISION_Q3,
|
||||
mi_col * MI_SIZE, mi_row * MI_SIZE,
|
||||
xd->bd);
|
||||
} else {
|
||||
vp9_build_inter_predictor(ref_yv12[!id].buf,
|
||||
ref_yv12[!id].stride,
|
||||
second_pred, pw,
|
||||
&frame_mv[refs[!id]].as_mv,
|
||||
&xd->block_refs[!id]->sf,
|
||||
pw, ph, 0,
|
||||
kernel, MV_PRECISION_Q3,
|
||||
mi_col * MI_SIZE, mi_row * MI_SIZE);
|
||||
}
|
||||
#else
|
||||
vp9_build_inter_predictor(ref_yv12[!id].buf,
|
||||
ref_yv12[!id].stride,
|
||||
second_pred, pw,
|
||||
@ -1958,6 +2221,7 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
|
||||
pw, ph, 0,
|
||||
kernel, MV_PRECISION_Q3,
|
||||
mi_col * MI_SIZE, mi_row * MI_SIZE);
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
|
||||
// Compound motion search on first ref frame.
|
||||
if (id)
|
||||
@ -2026,7 +2290,11 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
|
||||
x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
|
||||
}
|
||||
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
vpx_free(second_pred_alloc);
|
||||
#else
|
||||
vpx_free(second_pred);
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
}
|
||||
|
||||
static INLINE void restore_dst_buf(MACROBLOCKD *xd,
|
||||
@ -2068,12 +2336,26 @@ static void rd_encode_breakout_test(VP9_COMP *cpi, MACROBLOCK *x,
|
||||
|
||||
// Calculate threshold according to dequant value.
|
||||
thresh_ac = (xd->plane[0].dequant[1] * xd->plane[0].dequant[1]) / 9;
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
||||
const int shift = 2 * xd->bd - 16;
|
||||
if (shift > 0)
|
||||
thresh_ac = ROUND_POWER_OF_TWO(thresh_ac, shift);
|
||||
}
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
thresh_ac = clamp(thresh_ac, min_thresh, max_thresh);
|
||||
|
||||
// Adjust threshold according to partition size.
|
||||
thresh_ac >>= 8 - (b_width_log2(bsize) +
|
||||
b_height_log2(bsize));
|
||||
thresh_dc = (xd->plane[0].dequant[0] * xd->plane[0].dequant[0] >> 6);
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
||||
const int shift = 2 * xd->bd - 16;
|
||||
if (shift > 0)
|
||||
thresh_dc = ROUND_POWER_OF_TWO(thresh_dc, shift);
|
||||
}
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
} else {
|
||||
thresh_ac = 0;
|
||||
thresh_dc = 0;
|
||||
@ -2145,7 +2427,13 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
|
||||
int refs[2] = { mbmi->ref_frame[0],
|
||||
(mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) };
|
||||
int_mv cur_mv[2];
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
DECLARE_ALIGNED_ARRAY(16, uint16_t, tmp_buf16, MAX_MB_PLANE * 64 * 64);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf8, MAX_MB_PLANE * 64 * 64);
|
||||
uint8_t *tmp_buf = tmp_buf8;
|
||||
#else
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf, MAX_MB_PLANE * 64 * 64);
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
int pred_exists = 0;
|
||||
int intpel_mv;
|
||||
int64_t rd, tmp_rd, best_rd = INT64_MAX;
|
||||
@ -2162,6 +2450,14 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
|
||||
(((mi_row + mi_col) >> bsl) +
|
||||
get_chessboard_index(cm->current_video_frame)) & 0x1 : 0;
|
||||
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
||||
tmp_buf = CONVERT_TO_BYTEPTR(tmp_buf16);
|
||||
} else {
|
||||
tmp_buf = tmp_buf8;
|
||||
}
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
|
||||
if (pred_filter_search) {
|
||||
INTERP_FILTER af = SWITCHABLE, lf = SWITCHABLE;
|
||||
if (xd->up_available)
|
||||
@ -2575,8 +2871,8 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
|
||||
int64_t dist_uv[TX_SIZES];
|
||||
int skip_uv[TX_SIZES];
|
||||
PREDICTION_MODE mode_uv[TX_SIZES];
|
||||
const int intra_cost_penalty =
|
||||
20 * vp9_dc_quant(cm->base_qindex, cm->y_dc_delta_q, cm->bit_depth);
|
||||
const int intra_cost_penalty = vp9_get_intra_cost_penalty(
|
||||
cm->base_qindex, cm->y_dc_delta_q, cm->bit_depth);
|
||||
int best_skip2 = 0;
|
||||
uint8_t ref_frame_skip_mask[2] = { 0 };
|
||||
uint16_t mode_skip_mask[MAX_REF_FRAMES] = { 0 };
|
||||
@ -3011,9 +3307,14 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
|
||||
// based on qp, activity mask and history
|
||||
if ((mode_search_skip_flags & FLAG_EARLY_TERMINATE) &&
|
||||
(mode_index > MIN_EARLY_TERM_INDEX)) {
|
||||
const int qstep = xd->plane[0].dequant[1];
|
||||
int qstep = xd->plane[0].dequant[1];
|
||||
// TODO(debargha): Enhance this by specializing for each mode_index
|
||||
int scale = 4;
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
||||
qstep >>= (xd->bd - 8);
|
||||
}
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
if (x->source_variance < UINT_MAX) {
|
||||
const int var_adjust = (x->source_variance < 16);
|
||||
scale -= var_adjust;
|
||||
@ -3329,8 +3630,8 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
|
||||
int64_t dist_uv;
|
||||
int skip_uv;
|
||||
PREDICTION_MODE mode_uv = DC_PRED;
|
||||
const int intra_cost_penalty =
|
||||
20 * vp9_dc_quant(cm->base_qindex, cm->y_dc_delta_q, cm->bit_depth);
|
||||
const int intra_cost_penalty = vp9_get_intra_cost_penalty(
|
||||
cm->base_qindex, cm->y_dc_delta_q, cm->bit_depth);
|
||||
int_mv seg_mvs[4][MAX_REF_FRAMES];
|
||||
b_mode_info best_bmodes[4];
|
||||
int best_skip2 = 0;
|
||||
@ -3748,9 +4049,14 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
|
||||
// based on qp, activity mask and history
|
||||
if ((cpi->sf.mode_search_skip_flags & FLAG_EARLY_TERMINATE) &&
|
||||
(ref_index > MIN_EARLY_TERM_INDEX)) {
|
||||
const int qstep = xd->plane[0].dequant[1];
|
||||
int qstep = xd->plane[0].dequant[1];
|
||||
// TODO(debargha): Enhance this by specializing for each mode_index
|
||||
int scale = 4;
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
||||
qstep >>= (xd->bd - 8);
|
||||
}
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
if (x->source_variance < UINT_MAX) {
|
||||
const int var_adjust = (x->source_variance < 16);
|
||||
scale -= var_adjust;
|
||||
|
@ -54,7 +54,6 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(struct VP9_COMP *cpi,
|
||||
BLOCK_SIZE bsize,
|
||||
PICK_MODE_CONTEXT *ctx,
|
||||
int64_t best_rd_so_far);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
@ -53,6 +53,12 @@ extern const int16_t *vp9_dct_value_cost_ptr;
|
||||
* fields are not.
|
||||
*/
|
||||
extern const TOKENVALUE *vp9_dct_value_tokens_ptr;
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
extern const int16_t *vp9_dct_value_cost_high10_ptr;
|
||||
extern const TOKENVALUE *vp9_dct_value_tokens_high10_ptr;
|
||||
extern const int16_t *vp9_dct_value_cost_high12_ptr;
|
||||
extern const TOKENVALUE *vp9_dct_value_tokens_high12_ptr;
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
|
Loading…
x
Reference in New Issue
Block a user