Make interintra experiment work with highbitdepth

Also includes some adjustments to the algorithm.
All stats look good.

Change-Id: I824ef8ecf25b34f3feb358623d14fe375c3e4eb7
This commit is contained in:
Deb Mukherjee 2015-03-17 21:32:21 -07:00
parent c8ed36432e
commit c082df2359
5 changed files with 483 additions and 76 deletions

View File

@ -739,7 +739,6 @@ static void build_intra_predictors_highbd(const MACROBLOCKD *xd,
int frame_width, frame_height;
int x0, y0;
const struct macroblockd_plane *const pd = &xd->plane[plane];
// int base=128;
int base = 128 << (bd - 8);
// 127 127 127 .. 127 127 127 127 127 127
// 129 A B .. Y Z
@ -1336,7 +1335,6 @@ static void build_filter_intra_predictors(const MACROBLOCKD *xd,
}
#endif // CONFIG_FILTERINTRA
void vp9_predict_intra_block(const MACROBLOCKD *xd, int block_idx, int bwl_in,
TX_SIZE tx_size, PREDICTION_MODE mode,
#if CONFIG_FILTERINTRA
@ -1861,6 +1859,155 @@ static void combine_interintra(PREDICTION_MODE mode,
}
}
#if CONFIG_VP9_HIGHBITDEPTH
static void combine_interintra_highbd(PREDICTION_MODE mode,
#if CONFIG_WEDGE_PARTITION
int use_wedge_interintra,
int wedge_index,
BLOCK_SIZE bsize,
#endif // CONFIG_WEDGE_PARTITION
BLOCK_SIZE plane_bsize,
uint8_t *comppred8,
int compstride,
uint8_t *interpred8,
int interstride,
uint8_t *intrapred8,
int intrastride, int bd) {
static const int scale_bits = 8;
static const int scale_max = 256;
static const int scale_round = 127;
static const int weights1d[64] = {
128, 125, 122, 119, 116, 114, 111, 109,
107, 105, 103, 101, 99, 97, 96, 94,
93, 91, 90, 89, 88, 86, 85, 84,
83, 82, 81, 81, 80, 79, 78, 78,
77, 76, 76, 75, 75, 74, 74, 73,
73, 72, 72, 71, 71, 71, 70, 70,
70, 70, 69, 69, 69, 69, 68, 68,
68, 68, 68, 67, 67, 67, 67, 67,
};
const int bw = 4 << b_width_log2_lookup[plane_bsize];
const int bh = 4 << b_height_log2_lookup[plane_bsize];
int size = MAX(bw, bh);
int size_scale = (size >= 64 ? 1 :
size == 32 ? 2 :
size == 16 ? 4 :
size == 8 ? 8 : 16);
int i, j;
uint16_t *comppred = CONVERT_TO_SHORTPTR(comppred8);
uint16_t *interpred = CONVERT_TO_SHORTPTR(interpred8);
uint16_t *intrapred = CONVERT_TO_SHORTPTR(intrapred8);
(void) bd;
#if CONFIG_WEDGE_PARTITION
if (use_wedge_interintra && get_wedge_bits(bsize)) {
uint8_t mask[4096];
vp9_generate_masked_weight_interintra(wedge_index, bsize, bh, bw, mask, bw);
for (i = 0; i < bh; ++i) {
for (j = 0; j < bw; ++j) {
int m = mask[i * bw + j];
comppred[i * compstride + j] =
(intrapred[i * intrastride + j] * m +
interpred[i * interstride + j] * ((1 << WEDGE_WEIGHT_BITS) - m) +
(1 << (WEDGE_WEIGHT_BITS - 1))) >> WEDGE_WEIGHT_BITS;
}
}
return;
}
#endif // CONFIG_WEDGE_PARTITION
switch (mode) {
case V_PRED:
for (i = 0; i < bh; ++i) {
for (j = 0; j < bw; ++j) {
int scale = weights1d[i * size_scale];
comppred[i * compstride + j] =
((scale_max - scale) * interpred[i * interstride + j] +
scale * intrapred[i * intrastride + j] + scale_round)
>> scale_bits;
}
}
break;
case H_PRED:
for (i = 0; i < bh; ++i) {
for (j = 0; j < bw; ++j) {
int scale = weights1d[j * size_scale];
comppred[i * compstride + j] =
((scale_max - scale) * interpred[i * interstride + j] +
scale * intrapred[i * intrastride + j] + scale_round)
>> scale_bits;
}
}
break;
case D63_PRED:
case D117_PRED:
for (i = 0; i < bh; ++i) {
for (j = 0; j < bw; ++j) {
int scale = (weights1d[i * size_scale] * 3 +
weights1d[j * size_scale]) >> 2;
comppred[i * compstride + j] =
((scale_max - scale) * interpred[i * interstride + j] +
scale * intrapred[i * intrastride + j] + scale_round)
>> scale_bits;
}
}
break;
case D207_PRED:
case D153_PRED:
for (i = 0; i < bh; ++i) {
for (j = 0; j < bw; ++j) {
int scale = (weights1d[j * size_scale] * 3 +
weights1d[i * size_scale]) >> 2;
comppred[i * compstride + j] =
((scale_max - scale) * interpred[i * interstride + j] +
scale * intrapred[i * intrastride + j] + scale_round)
>> scale_bits;
}
}
break;
case D135_PRED:
for (i = 0; i < bh; ++i) {
for (j = 0; j < bw; ++j) {
int scale = weights1d[(i < j ? i : j) * size_scale];
comppred[i * compstride + j] =
((scale_max - scale) * interpred[i * interstride + j] +
scale * intrapred[i * intrastride + j] + scale_round)
>> scale_bits;
}
}
break;
case D45_PRED:
for (i = 0; i < bh; ++i) {
for (j = 0; j < bw; ++j) {
int scale = (weights1d[i * size_scale] +
weights1d[j * size_scale]) >> 1;
comppred[i * compstride + j] =
((scale_max - scale) * interpred[i * interstride + j] +
scale * intrapred[i * intrastride + j] + scale_round)
>> scale_bits;
}
}
break;
case TM_PRED:
case DC_PRED:
default:
for (i = 0; i < bh; ++i) {
for (j = 0; j < bw; ++j) {
comppred[i * compstride + j] = (interpred[i * interstride + j] +
intrapred[i * intrastride + j]) >> 1;
}
}
break;
}
}
#endif // CONFIG_VP9_HIGHBITDEPTH
static void build_intra_predictors_for_2nd_block_interintra(
const MACROBLOCKD *xd, const uint8_t *ref,
@ -1906,8 +2053,6 @@ static void build_intra_predictors_for_2nd_block_interintra(
x0 = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x)) + x;
y0 = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y)) + y;
vpx_memset(left_col, 129, 64);
// left
if (left_available) {
if (bwltbh) {
@ -1935,6 +2080,8 @@ static void build_intra_predictors_for_2nd_block_interintra(
for (i = 0; i < bs; ++i)
left_col[i] = ref_fi[i * ref_stride_fi - 1];
}
} else {
vpx_memset(left_col, 129, bs);
}
// TODO(hkuang) do not extend 2*bs pixels for all modes.
@ -1944,11 +2091,11 @@ static void build_intra_predictors_for_2nd_block_interintra(
if (bwltbh) {
ref_fi = dst;
ref_stride_fi = dst_stride;
above_row[-1] = left_available ? ref[-ref_stride-1] : 129;
above_row[-1] = left_available ? ref_fi[-ref_stride_fi-1] : 129;
} else {
ref_fi = ref;
ref_stride_fi = ref_stride;
above_row[-1] = ref[-ref_stride-1];
above_row[-1] = ref_fi[-ref_stride_fi-1];
}
above_ref = ref_fi - ref_stride_fi;
if (xd->mb_to_right_edge < 0) {
@ -2009,6 +2156,161 @@ static void build_intra_predictors_for_2nd_block_interintra(
}
}
#if CONFIG_VP9_HIGHBITDEPTH
static void build_intra_predictors_for_2nd_block_interintra_highbd(
const MACROBLOCKD *xd, const uint8_t *ref8,
int ref_stride, uint8_t *dst8, int dst_stride,
PREDICTION_MODE mode, TX_SIZE tx_size,
int up_available, int left_available,
int right_available, int bwltbh,
int x, int y, int plane, int bd) {
int i;
uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
DECLARE_ALIGNED_ARRAY(16, uint16_t, left_col, 64);
#if CONFIG_TX64X64
DECLARE_ALIGNED_ARRAY(16, uint16_t, above_data, 256 + 16);
#else
DECLARE_ALIGNED_ARRAY(16, uint16_t, above_data, 128 + 16);
#endif
uint16_t *above_row = above_data + 16;
const uint16_t *const_above_row = above_row;
const int bs = 4 << tx_size;
int frame_width, frame_height;
int x0, y0;
const struct macroblockd_plane *const pd = &xd->plane[plane];
int base = 128 << (bd - 8);
const uint16_t *ref_fi;
int ref_stride_fi;
// 127 127 127 .. 127 127 127 127 127 127
// 129 A B .. Y Z
// 129 C D .. W X
// 129 E F .. U V
// 129 G H .. S T T T T T
// ..
// Get current frame pointer, width and height.
if (plane == 0) {
frame_width = xd->cur_buf->y_width;
frame_height = xd->cur_buf->y_height;
} else {
frame_width = xd->cur_buf->uv_width;
frame_height = xd->cur_buf->uv_height;
}
// Get block position in current frame.
x0 = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x)) + x;
y0 = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y)) + y;
// left
if (left_available) {
if (bwltbh) {
ref_fi = ref;
ref_stride_fi = ref_stride;
} else {
ref_fi = dst;
ref_stride_fi = dst_stride;
}
if (xd->mb_to_bottom_edge < 0) {
/* slower path if the block needs border extension */
if (y0 + bs <= frame_height) {
for (i = 0; i < bs; ++i)
left_col[i] = ref_fi[i * ref_stride_fi - 1];
} else {
const int extend_bottom = frame_height - y0;
for (i = 0; i < extend_bottom; ++i)
left_col[i] = ref_fi[i * ref_stride_fi - 1];
for (; i < bs; ++i)
left_col[i] = ref_fi[(extend_bottom - 1) * ref_stride_fi - 1];
}
} else {
/* faster path if the block does not need extension */
for (i = 0; i < bs; ++i)
left_col[i] = ref_fi[i * ref_stride_fi - 1];
}
} else {
vpx_memset16(left_col, base + 1, bs);
}
// TODO(hkuang) do not extend 2*bs pixels for all modes.
// above
if (up_available) {
const uint16_t *above_ref;
if (bwltbh) {
ref_fi = dst;
ref_stride_fi = dst_stride;
above_row[-1] = left_available ? ref_fi[-ref_stride_fi-1] : (base + 1);
} else {
ref_fi = ref;
ref_stride_fi = ref_stride;
above_row[-1] = ref_fi[-ref_stride_fi-1];
}
above_ref = ref_fi - ref_stride_fi;
if (xd->mb_to_right_edge < 0) {
/* slower path if the block needs border extension */
if (x0 + 2 * bs <= frame_width) {
if (right_available && bs == 4) {
vpx_memcpy(above_row, above_ref, 2 * bs * sizeof(uint16_t));
} else {
vpx_memcpy(above_row, above_ref, bs * sizeof(uint16_t));
vpx_memset16(above_row + bs, above_row[bs - 1], bs);
}
} else if (x0 + bs <= frame_width) {
const int r = frame_width - x0;
if (right_available && bs == 4) {
vpx_memcpy(above_row, above_ref, r * sizeof(uint16_t));
vpx_memset16(above_row + r, above_row[r - 1],
x0 + 2 * bs - frame_width);
} else {
vpx_memcpy(above_row, above_ref, bs * sizeof(uint16_t));
vpx_memset16(above_row + bs, above_row[bs - 1], bs);
}
} else if (x0 <= frame_width) {
const int r = frame_width - x0;
if (right_available && bs == 4) {
vpx_memcpy(above_row, above_ref, r * sizeof(uint16_t));
vpx_memset16(above_row + r, above_row[r - 1],
x0 + 2 * bs - frame_width);
} else {
vpx_memcpy(above_row, above_ref, r * sizeof(uint16_t));
vpx_memset16(above_row + r, above_row[r - 1],
x0 + 2 * bs - frame_width);
}
}
// TODO(Peter) this value should probably change for high bitdepth
above_row[-1] = left_available ? above_ref[-1] : (base + 1);
} else {
/* faster path if the block does not need extension */
if (bs == 4 && right_available && left_available) {
const_above_row = above_ref;
} else {
vpx_memcpy(above_row, above_ref, bs * sizeof(uint16_t));
if (bs == 4 && right_available)
vpx_memcpy(above_row + bs, above_ref + bs, bs * sizeof(uint16_t));
else
vpx_memset16(above_row + bs, above_row[bs - 1], bs);
}
}
} else {
vpx_memset16(above_row, base - 1, bs * 2);
// TODO(Peter): this value should probably change for high bitdepth
above_row[-1] = base - 1;
}
// predict
if (mode == DC_PRED) {
dc_pred_high[left_available][up_available][tx_size](dst, dst_stride,
const_above_row,
left_col, bd);
} else {
pred_high[mode][tx_size](dst, dst_stride, const_above_row, left_col, bd);
}
}
#endif // CONFIG_VP9_HIGHBITDEPTH
// Break down rectangular intra prediction for joint spatio-temporal prediction
// into two square intra predictions.
static void build_intra_predictors_for_interintra(
@ -2034,7 +2336,7 @@ static void build_intra_predictors_for_interintra(
0, 0, plane);
build_intra_predictors_for_2nd_block_interintra(
xd, src_bottom, src_stride, pred_ptr_bottom, stride, mode, tx_size,
up_available, left_available, 0,
1, left_available, 0,
1, 0, bw, plane);
} else {
const TX_SIZE tx_size = blocklen_to_txsize(bh);
@ -2046,32 +2348,97 @@ static void build_intra_predictors_for_interintra(
0, 0, plane);
build_intra_predictors_for_2nd_block_interintra(
xd, src_right, src_stride, pred_ptr_right, stride, mode, tx_size,
up_available, left_available, right_available,
up_available, 1, right_available,
0, bh, 0, plane);
}
}
#if CONFIG_VP9_HIGHBITDEPTH
static void build_intra_predictors_for_interintra_highbd(
MACROBLOCKD *xd,
uint8_t *src, int src_stride,
uint8_t *pred_ptr, int stride,
PREDICTION_MODE mode,
int bw, int bh,
int up_available, int left_available,
int right_available, int plane) {
if (bw == bh) {
build_intra_predictors_highbd(xd, src, src_stride, pred_ptr, stride,
mode, blocklen_to_txsize(bw),
up_available, left_available, right_available,
0, 0, plane, xd->bd);
} else if (bw < bh) {
const TX_SIZE tx_size = blocklen_to_txsize(bw);
uint8_t *src_bottom = src + bw * src_stride;
uint8_t *pred_ptr_bottom = pred_ptr + bw * stride;
build_intra_predictors_highbd(
xd, src, src_stride, pred_ptr, stride, mode, tx_size,
up_available, left_available, right_available,
0, 0, plane, xd->bd);
build_intra_predictors_for_2nd_block_interintra_highbd(
xd, src_bottom, src_stride, pred_ptr_bottom, stride, mode, tx_size,
1, left_available, 0,
1, 0, bw, plane, xd->bd);
} else {
const TX_SIZE tx_size = blocklen_to_txsize(bh);
uint8_t *src_right = src + bh;
uint8_t *pred_ptr_right = pred_ptr + bh;
build_intra_predictors_highbd(
xd, src, src_stride, pred_ptr, stride, mode, tx_size,
up_available, left_available, 1,
0, 0, plane, xd->bd);
build_intra_predictors_for_2nd_block_interintra_highbd(
xd, src_right, src_stride, pred_ptr_right, stride, mode, tx_size,
up_available, 1, right_available,
0, bh, 0, plane, xd->bd);
}
}
#endif // CONFIG_VP9_HIGHBITDEPTH
void vp9_build_interintra_predictors_sby(MACROBLOCKD *xd,
uint8_t *ypred,
int ystride,
BLOCK_SIZE bsize) {
int bw = 4 << b_width_log2_lookup[bsize];
int bh = 4 << b_height_log2_lookup[bsize];
uint8_t intrapredictor[4096];
build_intra_predictors_for_interintra(
xd, xd->plane[0].dst.buf, xd->plane[0].dst.stride,
intrapredictor, bw,
xd->mi[0].src_mi->mbmi.interintra_mode, bw, bh,
xd->up_available, xd->left_available, 0, 0);
combine_interintra(xd->mi[0].src_mi->mbmi.interintra_mode,
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
DECLARE_ALIGNED_ARRAY(16, uint16_t, intrapredictor, 4096);
build_intra_predictors_for_interintra_highbd(
xd, xd->plane[0].dst.buf, xd->plane[0].dst.stride,
CONVERT_TO_BYTEPTR(intrapredictor), bw,
xd->mi[0].src_mi->mbmi.interintra_mode, bw, bh,
xd->up_available, xd->left_available, 0, 0);
combine_interintra_highbd(xd->mi[0].src_mi->mbmi.interintra_mode,
#if CONFIG_WEDGE_PARTITION
xd->mi[0].src_mi->mbmi.use_wedge_interintra,
xd->mi[0].src_mi->mbmi.interintra_wedge_index,
bsize,
xd->mi[0].src_mi->mbmi.use_wedge_interintra,
xd->mi[0].src_mi->mbmi.interintra_wedge_index,
bsize,
#endif // CONFIG_WEDGE_PARTITION
bsize,
xd->plane[0].dst.buf, xd->plane[0].dst.stride,
ypred, ystride, intrapredictor, bw);
bsize,
xd->plane[0].dst.buf, xd->plane[0].dst.stride,
ypred, ystride,
CONVERT_TO_BYTEPTR(intrapredictor), bw, xd->bd);
return;
}
#endif // CONFIG_VP9_HIGHBITDEPTH
{
uint8_t intrapredictor[4096];
build_intra_predictors_for_interintra(
xd, xd->plane[0].dst.buf, xd->plane[0].dst.stride,
intrapredictor, bw,
xd->mi[0].src_mi->mbmi.interintra_mode, bw, bh,
xd->up_available, xd->left_available, 0, 0);
combine_interintra(xd->mi[0].src_mi->mbmi.interintra_mode,
#if CONFIG_WEDGE_PARTITION
xd->mi[0].src_mi->mbmi.use_wedge_interintra,
xd->mi[0].src_mi->mbmi.interintra_wedge_index,
bsize,
#endif // CONFIG_WEDGE_PARTITION
bsize,
xd->plane[0].dst.buf, xd->plane[0].dst.stride,
ypred, ystride, intrapredictor, bw);
}
}
void vp9_build_interintra_predictors_sbuv(MACROBLOCKD *xd,
@ -2082,36 +2449,75 @@ void vp9_build_interintra_predictors_sbuv(MACROBLOCKD *xd,
BLOCK_SIZE uvbsize = get_plane_block_size(bsize, &xd->plane[1]);
int bw = 4 << b_width_log2_lookup[uvbsize];
int bh = 4 << b_height_log2_lookup[uvbsize];
uint8_t uintrapredictor[4096];
uint8_t vintrapredictor[4096];
build_intra_predictors_for_interintra(
xd, xd->plane[1].dst.buf, xd->plane[1].dst.stride,
uintrapredictor, bw,
xd->mi[0].src_mi->mbmi.interintra_uv_mode, bw, bh,
xd->up_available, xd->left_available, 0, 1);
build_intra_predictors_for_interintra(
xd, xd->plane[2].dst.buf, xd->plane[1].dst.stride,
vintrapredictor, bw,
xd->mi[0].src_mi->mbmi.interintra_uv_mode, bw, bh,
xd->up_available, xd->left_available, 0, 2);
combine_interintra(xd->mi[0].src_mi->mbmi.interintra_uv_mode,
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
DECLARE_ALIGNED_ARRAY(16, uint16_t, uintrapredictor, 4096);
DECLARE_ALIGNED_ARRAY(16, uint16_t, vintrapredictor, 4096);
build_intra_predictors_for_interintra_highbd(
xd, xd->plane[1].dst.buf, xd->plane[1].dst.stride,
CONVERT_TO_BYTEPTR(uintrapredictor), bw,
xd->mi[0].src_mi->mbmi.interintra_uv_mode, bw, bh,
xd->up_available, xd->left_available, 0, 1);
build_intra_predictors_for_interintra_highbd(
xd, xd->plane[2].dst.buf, xd->plane[1].dst.stride,
CONVERT_TO_BYTEPTR(vintrapredictor), bw,
xd->mi[0].src_mi->mbmi.interintra_uv_mode, bw, bh,
xd->up_available, xd->left_available, 0, 2);
combine_interintra_highbd(xd->mi[0].src_mi->mbmi.interintra_uv_mode,
#if CONFIG_WEDGE_PARTITION
xd->mi[0].src_mi->mbmi.use_wedge_interintra,
xd->mi[0].src_mi->mbmi.interintra_uv_wedge_index,
bsize,
xd->mi[0].src_mi->mbmi.use_wedge_interintra,
xd->mi[0].src_mi->mbmi.interintra_uv_wedge_index,
bsize,
#endif // CONFIG_WEDGE_PARTITION
uvbsize,
xd->plane[1].dst.buf, xd->plane[1].dst.stride,
upred, ustride, uintrapredictor, bw);
combine_interintra(xd->mi[0].src_mi->mbmi.interintra_uv_mode,
uvbsize,
xd->plane[1].dst.buf, xd->plane[1].dst.stride,
upred, ustride,
CONVERT_TO_BYTEPTR(uintrapredictor), bw, xd->bd);
combine_interintra_highbd(xd->mi[0].src_mi->mbmi.interintra_uv_mode,
#if CONFIG_WEDGE_PARTITION
xd->mi[0].src_mi->mbmi.use_wedge_interintra,
xd->mi[0].src_mi->mbmi.interintra_uv_wedge_index,
bsize,
xd->mi[0].src_mi->mbmi.use_wedge_interintra,
xd->mi[0].src_mi->mbmi.interintra_uv_wedge_index,
bsize,
#endif // CONFIG_WEDGE_PARTITION
uvbsize,
xd->plane[2].dst.buf, xd->plane[2].dst.stride,
vpred, vstride, vintrapredictor, bw);
uvbsize,
xd->plane[2].dst.buf, xd->plane[2].dst.stride,
vpred, vstride,
CONVERT_TO_BYTEPTR(vintrapredictor), bw, xd->bd);
return;
}
#endif // CONFIG_VP9_HIGHBITDEPTH
{
uint8_t uintrapredictor[4096];
uint8_t vintrapredictor[4096];
build_intra_predictors_for_interintra(
xd, xd->plane[1].dst.buf, xd->plane[1].dst.stride,
uintrapredictor, bw,
xd->mi[0].src_mi->mbmi.interintra_uv_mode, bw, bh,
xd->up_available, xd->left_available, 0, 1);
build_intra_predictors_for_interintra(
xd, xd->plane[2].dst.buf, xd->plane[1].dst.stride,
vintrapredictor, bw,
xd->mi[0].src_mi->mbmi.interintra_uv_mode, bw, bh,
xd->up_available, xd->left_available, 0, 2);
combine_interintra(xd->mi[0].src_mi->mbmi.interintra_uv_mode,
#if CONFIG_WEDGE_PARTITION
xd->mi[0].src_mi->mbmi.use_wedge_interintra,
xd->mi[0].src_mi->mbmi.interintra_uv_wedge_index,
bsize,
#endif // CONFIG_WEDGE_PARTITION
uvbsize,
xd->plane[1].dst.buf, xd->plane[1].dst.stride,
upred, ustride, uintrapredictor, bw);
combine_interintra(xd->mi[0].src_mi->mbmi.interintra_uv_mode,
#if CONFIG_WEDGE_PARTITION
xd->mi[0].src_mi->mbmi.use_wedge_interintra,
xd->mi[0].src_mi->mbmi.interintra_uv_wedge_index,
bsize,
#endif // CONFIG_WEDGE_PARTITION
uvbsize,
xd->plane[2].dst.buf, xd->plane[2].dst.stride,
vpred, vstride, vintrapredictor, bw);
}
}
void vp9_build_interintra_predictors(MACROBLOCKD *xd,

View File

@ -2153,27 +2153,27 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
add_proto qw/void vp9_highbd_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vp9_highbd_quantize_b_64x64/;
}
if (vpx_config("CONFIG_NEW_QUANT") eq "yes") {
add_proto qw/void vp9_highbd_quantize_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, const int16_t *dequant_ptr, const cumbins_type_nuq *cumbins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
specialize qw/vp9_highbd_quantize_nuq/;
if (vpx_config("CONFIG_NEW_QUANT") eq "yes") {
add_proto qw/void vp9_highbd_quantize_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, const int16_t *dequant_ptr, const cumbins_type_nuq *cumbins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
specialize qw/vp9_highbd_quantize_nuq/;
add_proto qw/void vp9_highbd_quantize_fp_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *dequant_ptr, const cumbins_type_nuq *cumbins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
specialize qw/vp9_highbd_quantize_fp_nuq/;
add_proto qw/void vp9_highbd_quantize_fp_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *dequant_ptr, const cumbins_type_nuq *cumbins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
specialize qw/vp9_highbd_quantize_fp_nuq/;
add_proto qw/void vp9_highbd_quantize_32x32_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, const int16_t *dequant_ptr, const cumbins_type_nuq *cumbins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
specialize qw/vp9_highbd_quantize_32x32_nuq/;
add_proto qw/void vp9_highbd_quantize_32x32_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, const int16_t *dequant_ptr, const cumbins_type_nuq *cumbins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
specialize qw/vp9_highbd_quantize_32x32_nuq/;
add_proto qw/void vp9_highbd_quantize_32x32_fp_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *dequant_ptr, const cumbins_type_nuq *cumbins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
specialize qw/vp9_highbd_quantize_32x32_fp_nuq/;
add_proto qw/void vp9_highbd_quantize_32x32_fp_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *dequant_ptr, const cumbins_type_nuq *cumbins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
specialize qw/vp9_highbd_quantize_32x32_fp_nuq/;
if (vpx_config("CONFIG_TX64X64") eq "yes") {
add_proto qw/void vp9_highbd_quantize_64x64_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, const int16_t *dequant_ptr, const cumbins_type_nuq *cumbins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
specialize qw/vp9_highbd_quantize_64x64_nuq/;
if (vpx_config("CONFIG_TX64X64") eq "yes") {
add_proto qw/void vp9_highbd_quantize_64x64_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, const int16_t *dequant_ptr, const cumbins_type_nuq *cumbins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
specialize qw/vp9_highbd_quantize_64x64_nuq/;
add_proto qw/void vp9_highbd_quantize_64x64_fp_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *dequant_ptr, const cumbins_type_nuq *cumbins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
specialize qw/vp9_highbd_quantize_64x64_fp_nuq/;
}
add_proto qw/void vp9_highbd_quantize_64x64_fp_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *dequant_ptr, const cumbins_type_nuq *cumbins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
specialize qw/vp9_highbd_quantize_64x64_fp_nuq/;
}
}

View File

@ -4981,7 +4981,7 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled,
#if CONFIG_FILTERINTRA
xd,
#endif
mi);
mi);
vp9_tokenize_sb(cpi, t, !output_enabled, MAX(bsize, BLOCK_8X8));
#if CONFIG_PALETTE
if (mbmi->palette_enabled[0] && output_enabled) {
@ -5008,7 +5008,6 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled,
vp9_build_inter_predictors_sby(xd, mi_row, mi_col, MAX(bsize, BLOCK_8X8));
vp9_build_inter_predictors_sbuv(xd, mi_row, mi_col, MAX(bsize, BLOCK_8X8));
vp9_encode_sb(x, MAX(bsize, BLOCK_8X8));
vp9_tokenize_sb(cpi, t, !output_enabled, MAX(bsize, BLOCK_8X8));
}

View File

@ -958,7 +958,6 @@ void vp9_highbd_quantize_nuq_c(const tran_low_t *coeff_ptr,
const int16_t *scan,
const uint8_t *band) {
int eob = -1;
(void)iscan;
vpx_memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
vpx_memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
if (!skip_block) {
@ -992,7 +991,6 @@ void vp9_highbd_quantize_fp_nuq_c(const tran_low_t *coeff_ptr,
const int16_t *scan,
const uint8_t *band) {
int eob = -1;
(void)iscan;
vpx_memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
vpx_memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
if (!skip_block) {

View File

@ -3905,6 +3905,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
#else
DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf, MAX_MB_PLANE * 64 * 64);
#endif // CONFIG_VP9_HIGHBITDEPTH
const int tmp_buf_sz = 64 * 64;
int pred_exists = 0;
int intpel_mv;
int64_t rd, tmp_rd, best_rd = INT64_MAX;
@ -4226,7 +4227,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
restore_dst_buf(xd, orig_dst, orig_dst_stride);
} else {
for (j = 0; j < MAX_MB_PLANE; j++) {
xd->plane[j].dst.buf = tmp_buf + j * 64 * 64;
xd->plane[j].dst.buf = tmp_buf + j * tmp_buf_sz;
xd->plane[j].dst.stride = 64;
}
}
@ -4429,7 +4430,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
#endif // CONFIG_WEDGE_PARTITION
mbmi->ref_frame[1] = NONE;
for (j = 0; j < MAX_MB_PLANE; j++) {
xd->plane[j].dst.buf = tmp_buf + j * 64 * 64;
xd->plane[j].dst.buf = tmp_buf + j * tmp_buf_sz;
xd->plane[j].dst.stride = 64;
}
vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
@ -4437,12 +4438,13 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
mbmi->ref_frame[1] = INTRA_FRAME;
for (interintra_mode = DC_PRED; interintra_mode <= TM_PRED;
++interintra_mode) {
++interintra_mode) {
mbmi->interintra_mode = interintra_mode;
mbmi->interintra_uv_mode = interintra_mode;
rmode = cpi->mbmode_cost[mbmi->interintra_mode];
vp9_build_interintra_predictors(xd, tmp_buf, tmp_buf + 64 * 64,
tmp_buf + 2* 64 * 64, 64, 64, 64, bsize);
vp9_build_interintra_predictors(xd, tmp_buf, tmp_buf + tmp_buf_sz,
tmp_buf + 2 * tmp_buf_sz, 64, 64, 64,
bsize);
model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum,
&skip_txfm_sb, &skip_sse_sb);
rd = RDCOST(x->rdmult, x->rddiv, rmode + rate_sum, dist_sum);
@ -4458,8 +4460,9 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
rmode = cpi->mbmode_cost[mbmi->interintra_mode];
if (wedge_bits) {
mbmi->use_wedge_interintra = 0;
vp9_build_interintra_predictors(xd, tmp_buf, tmp_buf + 64 * 64,
tmp_buf + 2* 64 * 64, 64, 64, 64, bsize);
vp9_build_interintra_predictors(xd, tmp_buf, tmp_buf + tmp_buf_sz,
tmp_buf + 2 * tmp_buf_sz, 64, 64, 64,
bsize);
model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum, NULL, NULL);
rwedge = vp9_cost_bit(cm->fc.wedge_interintra_prob[bsize], 0);
rd = RDCOST(x->rdmult, x->rddiv,
@ -4473,8 +4476,9 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
for (wedge_index = 0; wedge_index < wedge_types; ++wedge_index) {
mbmi->interintra_wedge_index = wedge_index;
mbmi->interintra_uv_wedge_index = wedge_index;
vp9_build_interintra_predictors(xd, tmp_buf, tmp_buf + 64 * 64,
tmp_buf + 2* 64 * 64, 64, 64, 64, bsize);
vp9_build_interintra_predictors(xd, tmp_buf, tmp_buf + tmp_buf_sz,
tmp_buf + 2 * tmp_buf_sz, 64, 64, 64,
bsize);
model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum, NULL, NULL);
rd = RDCOST(x->rdmult, x->rddiv,
rmode + rate_mv_tmp + rwedge + rate_sum, dist_sum);
@ -4565,7 +4569,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
if (best_needs_copy) {
// again temporarily set the buffers to local memory to prevent a memcpy
for (i = 0; i < MAX_MB_PLANE; i++) {
xd->plane[i].dst.buf = tmp_buf + i * 64 * 64;
xd->plane[i].dst.buf = tmp_buf + i * tmp_buf_sz;
xd->plane[i].dst.stride = 64;
}
}