vpx/vp10/common/reconinter.c
Ronald S. Bultje 50f944272c vp10: do sub8x8 block reconstruction in full subblocks.
This means that we don't reconstruct in 4x4 dimensions, but in
blocksize dimensions, e.g. 4x8 or 8x4. This may in some cases lead
to performance improvements. Also, if we decide to re-introduce
scalable coding support, this would fix the fact that you need to
re-scale the MV halfway the block in sub8x8 non-4x4 blocks.

See issue 1013.

Change-Id: If39c890cad20dff96635720d8c75b910cafac495
2015-09-16 19:35:54 -04:00

220 lines
9.1 KiB
C

/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <assert.h>
#include "./vpx_scale_rtcd.h"
#include "./vpx_config.h"
#include "vpx/vpx_integer.h"
#include "vp10/common/blockd.h"
#include "vp10/common/reconinter.h"
#include "vp10/common/reconintra.h"
#if CONFIG_VP9_HIGHBITDEPTH
void vp10_highbd_build_inter_predictor(const uint8_t *src, int src_stride,
uint8_t *dst, int dst_stride,
const MV *src_mv,
const struct scale_factors *sf,
int w, int h, int ref,
const InterpKernel *kernel,
enum mv_precision precision,
int x, int y, int bd) {
const int is_q4 = precision == MV_PRECISION_Q4;
const MV mv_q4 = { is_q4 ? src_mv->row : src_mv->row * 2,
is_q4 ? src_mv->col : src_mv->col * 2 };
MV32 mv = vp10_scale_mv(&mv_q4, x, y, sf);
const int subpel_x = mv.col & SUBPEL_MASK;
const int subpel_y = mv.row & SUBPEL_MASK;
src += (mv.row >> SUBPEL_BITS) * src_stride + (mv.col >> SUBPEL_BITS);
high_inter_predictor(src, src_stride, dst, dst_stride, subpel_x, subpel_y,
sf, w, h, ref, kernel, sf->x_step_q4, sf->y_step_q4, bd);
}
#endif // CONFIG_VP9_HIGHBITDEPTH
void vp10_build_inter_predictor(const uint8_t *src, int src_stride,
uint8_t *dst, int dst_stride,
const MV *src_mv,
const struct scale_factors *sf,
int w, int h, int ref,
const InterpKernel *kernel,
enum mv_precision precision,
int x, int y) {
const int is_q4 = precision == MV_PRECISION_Q4;
const MV mv_q4 = { is_q4 ? src_mv->row : src_mv->row * 2,
is_q4 ? src_mv->col : src_mv->col * 2 };
MV32 mv = vp10_scale_mv(&mv_q4, x, y, sf);
const int subpel_x = mv.col & SUBPEL_MASK;
const int subpel_y = mv.row & SUBPEL_MASK;
src += (mv.row >> SUBPEL_BITS) * src_stride + (mv.col >> SUBPEL_BITS);
inter_predictor(src, src_stride, dst, dst_stride, subpel_x, subpel_y,
sf, w, h, ref, kernel, sf->x_step_q4, sf->y_step_q4);
}
void build_inter_predictors(MACROBLOCKD *xd, int plane, int block,
int bw, int bh,
int x, int y, int w, int h,
int mi_x, int mi_y) {
struct macroblockd_plane *const pd = &xd->plane[plane];
const MODE_INFO *mi = xd->mi[0];
const int is_compound = has_second_ref(&mi->mbmi);
const InterpKernel *kernel = vp10_filter_kernels[mi->mbmi.interp_filter];
int ref;
for (ref = 0; ref < 1 + is_compound; ++ref) {
const struct scale_factors *const sf = &xd->block_refs[ref]->sf;
struct buf_2d *const pre_buf = &pd->pre[ref];
struct buf_2d *const dst_buf = &pd->dst;
uint8_t *const dst = dst_buf->buf + dst_buf->stride * y + x;
const MV mv = mi->mbmi.sb_type < BLOCK_8X8
? average_split_mvs(pd, mi, ref, block)
: mi->mbmi.mv[ref].as_mv;
// TODO(jkoleszar): This clamping is done in the incorrect place for the
// scaling case. It needs to be done on the scaled MV, not the pre-scaling
// MV. Note however that it performs the subsampling aware scaling so
// that the result is always q4.
// mv_precision precision is MV_PRECISION_Q4.
const MV mv_q4 = clamp_mv_to_umv_border_sb(xd, &mv, bw, bh,
pd->subsampling_x,
pd->subsampling_y);
uint8_t *pre;
MV32 scaled_mv;
int xs, ys, subpel_x, subpel_y;
const int is_scaled = vp10_is_scaled(sf);
if (is_scaled) {
pre = pre_buf->buf + scaled_buffer_offset(x, y, pre_buf->stride, sf);
scaled_mv = vp10_scale_mv(&mv_q4, mi_x + x, mi_y + y, sf);
xs = sf->x_step_q4;
ys = sf->y_step_q4;
} else {
pre = pre_buf->buf + (y * pre_buf->stride + x);
scaled_mv.row = mv_q4.row;
scaled_mv.col = mv_q4.col;
xs = ys = 16;
}
subpel_x = scaled_mv.col & SUBPEL_MASK;
subpel_y = scaled_mv.row & SUBPEL_MASK;
pre += (scaled_mv.row >> SUBPEL_BITS) * pre_buf->stride
+ (scaled_mv.col >> SUBPEL_BITS);
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
high_inter_predictor(pre, pre_buf->stride, dst, dst_buf->stride,
subpel_x, subpel_y, sf, w, h, ref, kernel, xs, ys,
xd->bd);
} else {
inter_predictor(pre, pre_buf->stride, dst, dst_buf->stride,
subpel_x, subpel_y, sf, w, h, ref, kernel, xs, ys);
}
#else
inter_predictor(pre, pre_buf->stride, dst, dst_buf->stride,
subpel_x, subpel_y, sf, w, h, ref, kernel, xs, ys);
#endif // CONFIG_VP9_HIGHBITDEPTH
}
}
static void build_inter_predictors_for_planes(MACROBLOCKD *xd, BLOCK_SIZE bsize,
int mi_row, int mi_col,
int plane_from, int plane_to) {
int plane;
const int mi_x = mi_col * MI_SIZE;
const int mi_y = mi_row * MI_SIZE;
for (plane = plane_from; plane <= plane_to; ++plane) {
const struct macroblockd_plane *pd = &xd->plane[plane];
const int bw = 4 * num_4x4_blocks_wide_lookup[bsize] >> pd->subsampling_x;
const int bh = 4 * num_4x4_blocks_high_lookup[bsize] >> pd->subsampling_y;
if (xd->mi[0]->mbmi.sb_type < BLOCK_8X8) {
const PARTITION_TYPE bp = bsize - xd->mi[0]->mbmi.sb_type;
const int have_vsplit = bp != PARTITION_HORZ;
const int have_hsplit = bp != PARTITION_VERT;
const int num_4x4_w = 2 >> ((!have_vsplit) | pd->subsampling_x);
const int num_4x4_h = 2 >> ((!have_hsplit) | pd->subsampling_y);
const int pw = 8 >> (have_vsplit | pd->subsampling_x);
const int ph = 8 >> (have_hsplit | pd->subsampling_y);
int x, y;
assert(bp != PARTITION_NONE && bp < PARTITION_TYPES);
assert(bsize == BLOCK_8X8);
assert(pw * num_4x4_w == bw && ph * num_4x4_h == bh);
for (y = 0; y < num_4x4_h; ++y)
for (x = 0; x < num_4x4_w; ++x)
build_inter_predictors(xd, plane, y * 2 + x, bw, bh,
4 * x, 4 * y, pw, ph, mi_x, mi_y);
} else {
build_inter_predictors(xd, plane, 0, bw, bh,
0, 0, bw, bh, mi_x, mi_y);
}
}
}
void vp10_build_inter_predictors_sby(MACROBLOCKD *xd, int mi_row, int mi_col,
BLOCK_SIZE bsize) {
build_inter_predictors_for_planes(xd, bsize, mi_row, mi_col, 0, 0);
}
void vp10_build_inter_predictors_sbp(MACROBLOCKD *xd, int mi_row, int mi_col,
BLOCK_SIZE bsize, int plane) {
build_inter_predictors_for_planes(xd, bsize, mi_row, mi_col, plane, plane);
}
void vp10_build_inter_predictors_sbuv(MACROBLOCKD *xd, int mi_row, int mi_col,
BLOCK_SIZE bsize) {
build_inter_predictors_for_planes(xd, bsize, mi_row, mi_col, 1,
MAX_MB_PLANE - 1);
}
void vp10_build_inter_predictors_sb(MACROBLOCKD *xd, int mi_row, int mi_col,
BLOCK_SIZE bsize) {
build_inter_predictors_for_planes(xd, bsize, mi_row, mi_col, 0,
MAX_MB_PLANE - 1);
}
void vp10_setup_dst_planes(struct macroblockd_plane planes[MAX_MB_PLANE],
const YV12_BUFFER_CONFIG *src,
int mi_row, int mi_col) {
uint8_t *const buffers[MAX_MB_PLANE] = { src->y_buffer, src->u_buffer,
src->v_buffer};
const int strides[MAX_MB_PLANE] = { src->y_stride, src->uv_stride,
src->uv_stride};
int i;
for (i = 0; i < MAX_MB_PLANE; ++i) {
struct macroblockd_plane *const pd = &planes[i];
setup_pred_plane(&pd->dst, buffers[i], strides[i], mi_row, mi_col, NULL,
pd->subsampling_x, pd->subsampling_y);
}
}
void vp10_setup_pre_planes(MACROBLOCKD *xd, int idx,
const YV12_BUFFER_CONFIG *src,
int mi_row, int mi_col,
const struct scale_factors *sf) {
if (src != NULL) {
int i;
uint8_t *const buffers[MAX_MB_PLANE] = { src->y_buffer, src->u_buffer,
src->v_buffer};
const int strides[MAX_MB_PLANE] = { src->y_stride, src->uv_stride,
src->uv_stride};
for (i = 0; i < MAX_MB_PLANE; ++i) {
struct macroblockd_plane *const pd = &xd->plane[i];
setup_pred_plane(&pd->pre[idx], buffers[i], strides[i], mi_row, mi_col,
sf, pd->subsampling_x, pd->subsampling_y);
}
}
}