Do sub-pixel motion search in up-sampled reference frames
Up-sampled the reference frames to 8 times in each dimension using the 8-tap interpolation filter. In sub-pixel motion search, use the up-sampled reference frames to find the best matching blocks. This largely improved the motion search precision, and thus, improved the compression quality. There was no change in decoder side. Borg test and speed test results: 1. On derflr set, Overall PSNR gain: 1.306%, and SSIM gain: 1.512%. Average speed loss on derf set was 6.0%. 2. On stdhd set, Overall PSNR gain: 0.754%, and SSIM gain: 0.814%. On hevchd set, Overall PSNR gain: 0.465%, and SSIM gain: 0.527%. Speed loss on HD clips was 3.5%. Change-Id: I300ebaafff57e88914f3dedc8784cb21d316b04f
This commit is contained in:
1
configure
vendored
1
configure
vendored
@@ -283,6 +283,7 @@ EXPERIMENT_LIST="
|
|||||||
loop_restoration
|
loop_restoration
|
||||||
ext_partition
|
ext_partition
|
||||||
obmc
|
obmc
|
||||||
|
affine_motion
|
||||||
"
|
"
|
||||||
CONFIG_LIST="
|
CONFIG_LIST="
|
||||||
dependency_tracking
|
dependency_tracking
|
||||||
|
|||||||
@@ -410,6 +410,15 @@ static void dealloc_compressor_data(VP10_COMP *cpi) {
|
|||||||
vpx_free(cpi->active_map.map);
|
vpx_free(cpi->active_map.map);
|
||||||
cpi->active_map.map = NULL;
|
cpi->active_map.map = NULL;
|
||||||
|
|
||||||
|
#if CONFIG_AFFINE_MOTION
|
||||||
|
{
|
||||||
|
// Free up-sampled reference buffers.
|
||||||
|
int i;
|
||||||
|
for (i = 0; i < MAX_REF_FRAMES; i++)
|
||||||
|
vpx_free_frame_buffer(&cpi->upsampled_ref_bufs[i].buf);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
vp10_free_ref_frame_buffers(cm->buffer_pool);
|
vp10_free_ref_frame_buffers(cm->buffer_pool);
|
||||||
#if CONFIG_VP9_POSTPROC
|
#if CONFIG_VP9_POSTPROC
|
||||||
vp10_free_postproc_buffers(cm);
|
vp10_free_postproc_buffers(cm);
|
||||||
@@ -744,6 +753,26 @@ static void alloc_util_frame_buffers(VP10_COMP *cpi) {
|
|||||||
NULL, NULL, NULL))
|
NULL, NULL, NULL))
|
||||||
vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
|
vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
|
||||||
"Failed to allocate scaled last source buffer");
|
"Failed to allocate scaled last source buffer");
|
||||||
|
|
||||||
|
#if CONFIG_AFFINE_MOTION
|
||||||
|
{
|
||||||
|
// Allocate up-sampled reference buffers.
|
||||||
|
int i;
|
||||||
|
|
||||||
|
for (i = 0; i < MAX_REF_FRAMES; i++)
|
||||||
|
if (vpx_realloc_frame_buffer(&cpi->upsampled_ref_bufs[i].buf,
|
||||||
|
(cm->width << 3), (cm->height << 3),
|
||||||
|
cm->subsampling_x, cm->subsampling_y,
|
||||||
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
|
cm->use_highbitdepth,
|
||||||
|
#endif
|
||||||
|
(VP9_ENC_BORDER_IN_PIXELS << 3),
|
||||||
|
cm->byte_alignment,
|
||||||
|
NULL, NULL, NULL))
|
||||||
|
vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
|
||||||
|
"Failed to allocate up-sampled reference frame buffer");
|
||||||
|
}
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -2353,10 +2382,11 @@ static void scale_and_extend_frame_nonnormative(const YV12_BUFFER_CONFIG *src,
|
|||||||
|
|
||||||
#if CONFIG_VP9_HIGHBITDEPTH
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
static void scale_and_extend_frame(const YV12_BUFFER_CONFIG *src,
|
static void scale_and_extend_frame(const YV12_BUFFER_CONFIG *src,
|
||||||
YV12_BUFFER_CONFIG *dst, int bd) {
|
YV12_BUFFER_CONFIG *dst, int planes,
|
||||||
|
int bd) {
|
||||||
#else
|
#else
|
||||||
static void scale_and_extend_frame(const YV12_BUFFER_CONFIG *src,
|
static void scale_and_extend_frame(const YV12_BUFFER_CONFIG *src,
|
||||||
YV12_BUFFER_CONFIG *dst) {
|
YV12_BUFFER_CONFIG *dst, int planes) {
|
||||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||||
const int src_w = src->y_crop_width;
|
const int src_w = src->y_crop_width;
|
||||||
const int src_h = src->y_crop_height;
|
const int src_h = src->y_crop_height;
|
||||||
@@ -2374,7 +2404,7 @@ static void scale_and_extend_frame(const YV12_BUFFER_CONFIG *src,
|
|||||||
|
|
||||||
for (y = 0; y < dst_h; y += 16) {
|
for (y = 0; y < dst_h; y += 16) {
|
||||||
for (x = 0; x < dst_w; x += 16) {
|
for (x = 0; x < dst_w; x += 16) {
|
||||||
for (i = 0; i < MAX_MB_PLANE; ++i) {
|
for (i = 0; i < planes; ++i) {
|
||||||
const int factor = (i == 0 || i == 3 ? 1 : 2);
|
const int factor = (i == 0 || i == 3 ? 1 : 2);
|
||||||
const int x_q4 = x * (16 / factor) * src_w / dst_w;
|
const int x_q4 = x * (16 / factor) * src_w / dst_w;
|
||||||
const int y_q4 = y * (16 / factor) * src_h / dst_h;
|
const int y_q4 = y * (16 / factor) * src_h / dst_h;
|
||||||
@@ -2391,13 +2421,13 @@ static void scale_and_extend_frame(const YV12_BUFFER_CONFIG *src,
|
|||||||
&kernel[(y_q4 & 0xf) * taps], 16 * src_h / dst_h,
|
&kernel[(y_q4 & 0xf) * taps], 16 * src_h / dst_h,
|
||||||
16 / factor, 16 / factor, bd);
|
16 / factor, 16 / factor, bd);
|
||||||
} else {
|
} else {
|
||||||
vpx_convolve8(src_ptr, src_stride, dst_ptr, dst_stride,
|
vpx_scaled_2d(src_ptr, src_stride, dst_ptr, dst_stride,
|
||||||
&kernel[(x_q4 & 0xf) * taps], 16 * src_w / dst_w,
|
&kernel[(x_q4 & 0xf) * taps], 16 * src_w / dst_w,
|
||||||
&kernel[(y_q4 & 0xf) * taps], 16 * src_h / dst_h,
|
&kernel[(y_q4 & 0xf) * taps], 16 * src_h / dst_h,
|
||||||
16 / factor, 16 / factor);
|
16 / factor, 16 / factor);
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
vpx_convolve8(src_ptr, src_stride, dst_ptr, dst_stride,
|
vpx_scaled_2d(src_ptr, src_stride, dst_ptr, dst_stride,
|
||||||
&kernel[(x_q4 & 0xf) * taps], 16 * src_w / dst_w,
|
&kernel[(x_q4 & 0xf) * taps], 16 * src_w / dst_w,
|
||||||
&kernel[(y_q4 & 0xf) * taps], 16 * src_h / dst_h,
|
&kernel[(y_q4 & 0xf) * taps], 16 * src_h / dst_h,
|
||||||
16 / factor, 16 / factor);
|
16 / factor, 16 / factor);
|
||||||
@@ -2406,6 +2436,9 @@ static void scale_and_extend_frame(const YV12_BUFFER_CONFIG *src,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (planes == 1)
|
||||||
|
vpx_extend_frame_borders_y(dst);
|
||||||
|
else
|
||||||
vpx_extend_frame_borders(dst);
|
vpx_extend_frame_borders(dst);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2462,6 +2495,45 @@ static int recode_loop_test(VP10_COMP *cpi,
|
|||||||
return force_recode;
|
return force_recode;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if CONFIG_AFFINE_MOTION
|
||||||
|
static INLINE int get_free_upsampled_ref_buf(EncRefCntBuffer *ubufs) {
|
||||||
|
int i;
|
||||||
|
|
||||||
|
for (i = 0; i < MAX_REF_FRAMES; i++) {
|
||||||
|
if (!ubufs[i].ref_count) {
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return INVALID_IDX;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Up-sample reference frames.
|
||||||
|
static INLINE int upsample_ref_frame(RefCntBuffer *bufs,
|
||||||
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
|
EncRefCntBuffer *ubufs, int new_idx,
|
||||||
|
int bit_depth) {
|
||||||
|
#else
|
||||||
|
EncRefCntBuffer *ubufs, int new_idx) {
|
||||||
|
#endif
|
||||||
|
int new_uidx = get_free_upsampled_ref_buf(ubufs);
|
||||||
|
|
||||||
|
if (new_uidx == INVALID_IDX) {
|
||||||
|
return INVALID_IDX;
|
||||||
|
} else {
|
||||||
|
const YV12_BUFFER_CONFIG *const ref = &bufs[new_idx].buf;
|
||||||
|
YV12_BUFFER_CONFIG *upsampled_ref = &ubufs[new_uidx].buf;
|
||||||
|
|
||||||
|
// Currently, only Y plane is up-sampled, U, V are not used.
|
||||||
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
|
scale_and_extend_frame(ref, upsampled_ref, 1, bit_depth);
|
||||||
|
#else
|
||||||
|
scale_and_extend_frame(ref, upsampled_ref, 1);
|
||||||
|
#endif
|
||||||
|
return new_uidx;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
void vp10_update_reference_frames(VP10_COMP *cpi) {
|
void vp10_update_reference_frames(VP10_COMP *cpi) {
|
||||||
VP10_COMMON * const cm = &cpi->common;
|
VP10_COMMON * const cm = &cpi->common;
|
||||||
BufferPool *const pool = cm->buffer_pool;
|
BufferPool *const pool = cm->buffer_pool;
|
||||||
@@ -2469,6 +2541,17 @@ void vp10_update_reference_frames(VP10_COMP *cpi) {
|
|||||||
int ref_frame;
|
int ref_frame;
|
||||||
#endif // CONFIG_EXT_REFS
|
#endif // CONFIG_EXT_REFS
|
||||||
|
|
||||||
|
#if CONFIG_AFFINE_MOTION
|
||||||
|
// Always up-sample the current encoded frame.
|
||||||
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
|
int new_uidx = upsample_ref_frame(pool->frame_bufs, cpi->upsampled_ref_bufs,
|
||||||
|
cm->new_fb_idx, (int)cm->bit_depth);
|
||||||
|
#else
|
||||||
|
int new_uidx = upsample_ref_frame(pool->frame_bufs, cpi->upsampled_ref_bufs,
|
||||||
|
cm->new_fb_idx);
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
// At this point the new frame has been encoded.
|
// At this point the new frame has been encoded.
|
||||||
// If any buffer copy / swapping is signaled it should be done here.
|
// If any buffer copy / swapping is signaled it should be done here.
|
||||||
if (cm->frame_type == KEY_FRAME) {
|
if (cm->frame_type == KEY_FRAME) {
|
||||||
@@ -2476,6 +2559,13 @@ void vp10_update_reference_frames(VP10_COMP *cpi) {
|
|||||||
&cm->ref_frame_map[cpi->gld_fb_idx], cm->new_fb_idx);
|
&cm->ref_frame_map[cpi->gld_fb_idx], cm->new_fb_idx);
|
||||||
ref_cnt_fb(pool->frame_bufs,
|
ref_cnt_fb(pool->frame_bufs,
|
||||||
&cm->ref_frame_map[cpi->alt_fb_idx], cm->new_fb_idx);
|
&cm->ref_frame_map[cpi->alt_fb_idx], cm->new_fb_idx);
|
||||||
|
|
||||||
|
#if CONFIG_AFFINE_MOTION
|
||||||
|
uref_cnt_fb(cpi->upsampled_ref_bufs,
|
||||||
|
&cpi->upsampled_ref_idx[cpi->gld_fb_idx], new_uidx);
|
||||||
|
uref_cnt_fb(cpi->upsampled_ref_bufs,
|
||||||
|
&cpi->upsampled_ref_idx[cpi->alt_fb_idx], new_uidx);
|
||||||
|
#endif
|
||||||
} else if (vp10_preserve_existing_gf(cpi)) {
|
} else if (vp10_preserve_existing_gf(cpi)) {
|
||||||
// We have decided to preserve the previously existing golden frame as our
|
// We have decided to preserve the previously existing golden frame as our
|
||||||
// new ARF frame. However, in the short term in function
|
// new ARF frame. However, in the short term in function
|
||||||
@@ -2489,7 +2579,10 @@ void vp10_update_reference_frames(VP10_COMP *cpi) {
|
|||||||
|
|
||||||
ref_cnt_fb(pool->frame_bufs,
|
ref_cnt_fb(pool->frame_bufs,
|
||||||
&cm->ref_frame_map[cpi->alt_fb_idx], cm->new_fb_idx);
|
&cm->ref_frame_map[cpi->alt_fb_idx], cm->new_fb_idx);
|
||||||
|
#if CONFIG_AFFINE_MOTION
|
||||||
|
uref_cnt_fb(cpi->upsampled_ref_bufs,
|
||||||
|
&cpi->upsampled_ref_idx[cpi->alt_fb_idx], new_uidx);
|
||||||
|
#endif
|
||||||
tmp = cpi->alt_fb_idx;
|
tmp = cpi->alt_fb_idx;
|
||||||
cpi->alt_fb_idx = cpi->gld_fb_idx;
|
cpi->alt_fb_idx = cpi->gld_fb_idx;
|
||||||
cpi->gld_fb_idx = tmp;
|
cpi->gld_fb_idx = tmp;
|
||||||
@@ -2503,6 +2596,10 @@ void vp10_update_reference_frames(VP10_COMP *cpi) {
|
|||||||
|
|
||||||
ref_cnt_fb(pool->frame_bufs,
|
ref_cnt_fb(pool->frame_bufs,
|
||||||
&cm->ref_frame_map[arf_idx], cm->new_fb_idx);
|
&cm->ref_frame_map[arf_idx], cm->new_fb_idx);
|
||||||
|
#if CONFIG_AFFINE_MOTION
|
||||||
|
uref_cnt_fb(cpi->upsampled_ref_bufs,
|
||||||
|
&cpi->upsampled_ref_idx[cpi->alt_fb_idx], new_uidx);
|
||||||
|
#endif
|
||||||
memcpy(cpi->interp_filter_selected[ALTREF_FRAME],
|
memcpy(cpi->interp_filter_selected[ALTREF_FRAME],
|
||||||
cpi->interp_filter_selected[0],
|
cpi->interp_filter_selected[0],
|
||||||
sizeof(cpi->interp_filter_selected[0]));
|
sizeof(cpi->interp_filter_selected[0]));
|
||||||
@@ -2511,6 +2608,10 @@ void vp10_update_reference_frames(VP10_COMP *cpi) {
|
|||||||
if (cpi->refresh_golden_frame) {
|
if (cpi->refresh_golden_frame) {
|
||||||
ref_cnt_fb(pool->frame_bufs,
|
ref_cnt_fb(pool->frame_bufs,
|
||||||
&cm->ref_frame_map[cpi->gld_fb_idx], cm->new_fb_idx);
|
&cm->ref_frame_map[cpi->gld_fb_idx], cm->new_fb_idx);
|
||||||
|
#if CONFIG_AFFINE_MOTION
|
||||||
|
uref_cnt_fb(cpi->upsampled_ref_bufs,
|
||||||
|
&cpi->upsampled_ref_idx[cpi->gld_fb_idx], new_uidx);
|
||||||
|
#endif
|
||||||
if (!cpi->rc.is_src_frame_alt_ref)
|
if (!cpi->rc.is_src_frame_alt_ref)
|
||||||
memcpy(cpi->interp_filter_selected[GOLDEN_FRAME],
|
memcpy(cpi->interp_filter_selected[GOLDEN_FRAME],
|
||||||
cpi->interp_filter_selected[0],
|
cpi->interp_filter_selected[0],
|
||||||
@@ -2545,6 +2646,10 @@ void vp10_update_reference_frames(VP10_COMP *cpi) {
|
|||||||
if (cpi->refresh_last_frame) {
|
if (cpi->refresh_last_frame) {
|
||||||
ref_cnt_fb(pool->frame_bufs,
|
ref_cnt_fb(pool->frame_bufs,
|
||||||
&cm->ref_frame_map[cpi->lst_fb_idx], cm->new_fb_idx);
|
&cm->ref_frame_map[cpi->lst_fb_idx], cm->new_fb_idx);
|
||||||
|
#if CONFIG_AFFINE_MOTION
|
||||||
|
uref_cnt_fb(cpi->upsampled_ref_bufs,
|
||||||
|
&cpi->upsampled_ref_idx[cpi->lst_fb_idx], new_uidx);
|
||||||
|
#endif
|
||||||
if (!cpi->rc.is_src_frame_alt_ref) {
|
if (!cpi->rc.is_src_frame_alt_ref) {
|
||||||
memcpy(cpi->interp_filter_selected[LAST_FRAME],
|
memcpy(cpi->interp_filter_selected[LAST_FRAME],
|
||||||
cpi->interp_filter_selected[0],
|
cpi->interp_filter_selected[0],
|
||||||
@@ -2678,7 +2783,8 @@ void vp10_scale_references(VP10_COMP *cpi) {
|
|||||||
cm->byte_alignment, NULL, NULL, NULL))
|
cm->byte_alignment, NULL, NULL, NULL))
|
||||||
vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
|
vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
|
||||||
"Failed to allocate frame buffer");
|
"Failed to allocate frame buffer");
|
||||||
scale_and_extend_frame(ref, &new_fb_ptr->buf, (int)cm->bit_depth);
|
scale_and_extend_frame(ref, &new_fb_ptr->buf, MAX_MB_PLANE,
|
||||||
|
(int)cm->bit_depth);
|
||||||
cpi->scaled_ref_idx[ref_frame - 1] = new_fb;
|
cpi->scaled_ref_idx[ref_frame - 1] = new_fb;
|
||||||
alloc_frame_mvs(cm, new_fb);
|
alloc_frame_mvs(cm, new_fb);
|
||||||
}
|
}
|
||||||
@@ -2703,11 +2809,39 @@ void vp10_scale_references(VP10_COMP *cpi) {
|
|||||||
cm->byte_alignment, NULL, NULL, NULL))
|
cm->byte_alignment, NULL, NULL, NULL))
|
||||||
vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
|
vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
|
||||||
"Failed to allocate frame buffer");
|
"Failed to allocate frame buffer");
|
||||||
scale_and_extend_frame(ref, &new_fb_ptr->buf);
|
scale_and_extend_frame(ref, &new_fb_ptr->buf, MAX_MB_PLANE);
|
||||||
cpi->scaled_ref_idx[ref_frame - 1] = new_fb;
|
cpi->scaled_ref_idx[ref_frame - 1] = new_fb;
|
||||||
alloc_frame_mvs(cm, new_fb);
|
alloc_frame_mvs(cm, new_fb);
|
||||||
}
|
}
|
||||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||||
|
|
||||||
|
#if CONFIG_AFFINE_MOTION
|
||||||
|
{
|
||||||
|
const int map_idx = get_ref_frame_map_idx(cpi, ref_frame);
|
||||||
|
EncRefCntBuffer *ubuf =
|
||||||
|
&cpi->upsampled_ref_bufs[cpi->upsampled_ref_idx[map_idx]];
|
||||||
|
|
||||||
|
if (vpx_realloc_frame_buffer(&ubuf->buf,
|
||||||
|
(cm->width << 3), (cm->height << 3),
|
||||||
|
cm->subsampling_x, cm->subsampling_y,
|
||||||
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
|
cm->use_highbitdepth,
|
||||||
|
#endif
|
||||||
|
(VP9_ENC_BORDER_IN_PIXELS << 3),
|
||||||
|
cm->byte_alignment,
|
||||||
|
NULL, NULL, NULL))
|
||||||
|
vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
|
||||||
|
"Failed to allocate up-sampled frame buffer");
|
||||||
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
|
scale_and_extend_frame(&new_fb_ptr->buf, &ubuf->buf, MAX_MB_PLANE,
|
||||||
|
(int)cm->bit_depth);
|
||||||
|
#else
|
||||||
|
scale_and_extend_frame(&new_fb_ptr->buf, &ubuf->buf, MAX_MB_PLANE);
|
||||||
|
#endif
|
||||||
|
cpi->scaled_ref_idx[ref_frame - LAST_FRAME] = new_fb;
|
||||||
|
alloc_frame_mvs(cm, new_fb);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
} else {
|
} else {
|
||||||
const int buf_idx = get_ref_frame_buf_idx(cpi, ref_frame);
|
const int buf_idx = get_ref_frame_buf_idx(cpi, ref_frame);
|
||||||
RefCntBuffer *const buf = &pool->frame_bufs[buf_idx];
|
RefCntBuffer *const buf = &pool->frame_bufs[buf_idx];
|
||||||
@@ -3787,6 +3921,17 @@ static void init_ref_frame_bufs(VP10_COMMON *cm) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if CONFIG_AFFINE_MOTION
|
||||||
|
static INLINE void init_upsampled_ref_frame_bufs(VP10_COMP *cpi) {
|
||||||
|
int i;
|
||||||
|
|
||||||
|
for (i = 0; i < MAX_REF_FRAMES; ++i) {
|
||||||
|
cpi->upsampled_ref_bufs[i].ref_count = 0;
|
||||||
|
cpi->upsampled_ref_idx[i] = INVALID_IDX;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
static void check_initial_width(VP10_COMP *cpi,
|
static void check_initial_width(VP10_COMP *cpi,
|
||||||
#if CONFIG_VP9_HIGHBITDEPTH
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
int use_highbitdepth,
|
int use_highbitdepth,
|
||||||
@@ -3809,7 +3954,9 @@ static void check_initial_width(VP10_COMP *cpi,
|
|||||||
alloc_raw_frame_buffers(cpi);
|
alloc_raw_frame_buffers(cpi);
|
||||||
init_ref_frame_bufs(cm);
|
init_ref_frame_bufs(cm);
|
||||||
alloc_util_frame_buffers(cpi);
|
alloc_util_frame_buffers(cpi);
|
||||||
|
#if CONFIG_AFFINE_MOTION
|
||||||
|
init_upsampled_ref_frame_bufs(cpi);
|
||||||
|
#endif
|
||||||
init_motion_estimation(cpi); // TODO(agrange) This can be removed.
|
init_motion_estimation(cpi); // TODO(agrange) This can be removed.
|
||||||
|
|
||||||
cpi->initial_width = cm->width;
|
cpi->initial_width = cm->width;
|
||||||
|
|||||||
@@ -286,6 +286,13 @@ typedef struct IMAGE_STAT {
|
|||||||
double worst;
|
double worst;
|
||||||
} ImageStat;
|
} ImageStat;
|
||||||
|
|
||||||
|
#if CONFIG_AFFINE_MOTION
|
||||||
|
typedef struct {
|
||||||
|
int ref_count;
|
||||||
|
YV12_BUFFER_CONFIG buf;
|
||||||
|
} EncRefCntBuffer;
|
||||||
|
#endif
|
||||||
|
|
||||||
typedef struct VP10_COMP {
|
typedef struct VP10_COMP {
|
||||||
QUANTS quants;
|
QUANTS quants;
|
||||||
ThreadData td;
|
ThreadData td;
|
||||||
@@ -304,6 +311,12 @@ typedef struct VP10_COMP {
|
|||||||
YV12_BUFFER_CONFIG *unscaled_last_source;
|
YV12_BUFFER_CONFIG *unscaled_last_source;
|
||||||
YV12_BUFFER_CONFIG scaled_last_source;
|
YV12_BUFFER_CONFIG scaled_last_source;
|
||||||
|
|
||||||
|
#if CONFIG_AFFINE_MOTION
|
||||||
|
// Up-sampled reference buffers
|
||||||
|
EncRefCntBuffer upsampled_ref_bufs[MAX_REF_FRAMES];
|
||||||
|
int upsampled_ref_idx[MAX_REF_FRAMES];
|
||||||
|
#endif
|
||||||
|
|
||||||
TileDataEnc *tile_data;
|
TileDataEnc *tile_data;
|
||||||
int allocated_tiles; // Keep track of memory allocated for tiles.
|
int allocated_tiles; // Keep track of memory allocated for tiles.
|
||||||
|
|
||||||
@@ -692,4 +705,18 @@ void vp10_new_framerate(VP10_COMP *cpi, double framerate);
|
|||||||
} // extern "C"
|
} // extern "C"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if CONFIG_AFFINE_MOTION
|
||||||
|
// Update up-sampled reference frame index.
|
||||||
|
static INLINE void uref_cnt_fb(EncRefCntBuffer *ubufs, int *uidx,
|
||||||
|
int new_uidx) {
|
||||||
|
const int ref_index = *uidx;
|
||||||
|
|
||||||
|
if (ref_index >= 0 && ubufs[ref_index].ref_count > 0)
|
||||||
|
ubufs[ref_index].ref_count--;
|
||||||
|
|
||||||
|
*uidx = new_uidx;
|
||||||
|
ubufs[new_uidx].ref_count++;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif // VP10_ENCODER_ENCODER_H_
|
#endif // VP10_ENCODER_ENCODER_H_
|
||||||
|
|||||||
@@ -64,7 +64,11 @@ static unsigned int do_16x16_motion_iteration(VP10_COMP *cpi,
|
|||||||
&v_fn_ptr, 0, mv_sf->subpel_iters_per_step,
|
&v_fn_ptr, 0, mv_sf->subpel_iters_per_step,
|
||||||
cond_cost_list(cpi, cost_list),
|
cond_cost_list(cpi, cost_list),
|
||||||
NULL, NULL,
|
NULL, NULL,
|
||||||
|
#if CONFIG_AFFINE_MOTION
|
||||||
|
&distortion, &sse, NULL, 0, 0, 0);
|
||||||
|
#else
|
||||||
&distortion, &sse, NULL, 0, 0);
|
&distortion, &sse, NULL, 0, 0);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
#if CONFIG_EXT_INTER
|
#if CONFIG_EXT_INTER
|
||||||
|
|||||||
@@ -208,6 +208,32 @@ static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c) {
|
|||||||
v = INT_MAX; \
|
v = INT_MAX; \
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define CHECK_BETTER0(v, r, c) CHECK_BETTER(v, r, c)
|
||||||
|
|
||||||
|
#if CONFIG_AFFINE_MOTION
|
||||||
|
static INLINE const uint8_t *upre(const uint8_t *buf, int stride,
|
||||||
|
int r, int c) {
|
||||||
|
return &buf[(r) * stride + (c)];
|
||||||
|
}
|
||||||
|
|
||||||
|
/* checks if (r, c) has better score than previous best */
|
||||||
|
#define CHECK_BETTER1(v, r, c) \
|
||||||
|
if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \
|
||||||
|
thismse = upsampled_pref_error(xd, vfp, z, src_stride, \
|
||||||
|
upre(y, y_stride, r, c), y_stride, \
|
||||||
|
second_pred, w, h, &sse); \
|
||||||
|
if ((v = MVC(r, c) + thismse) < besterr) { \
|
||||||
|
besterr = v; \
|
||||||
|
br = r; \
|
||||||
|
bc = c; \
|
||||||
|
*distortion = thismse; \
|
||||||
|
*sse1 = sse; \
|
||||||
|
} \
|
||||||
|
} else { \
|
||||||
|
v = INT_MAX; \
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
#define FIRST_LEVEL_CHECKS \
|
#define FIRST_LEVEL_CHECKS \
|
||||||
{ \
|
{ \
|
||||||
unsigned int left, right, up, down, diag; \
|
unsigned int left, right, up, down, diag; \
|
||||||
@@ -276,7 +302,7 @@ static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c) {
|
|||||||
// TODO(yunqingwang): SECOND_LEVEL_CHECKS_BEST was a rewrote of
|
// TODO(yunqingwang): SECOND_LEVEL_CHECKS_BEST was a rewrote of
|
||||||
// SECOND_LEVEL_CHECKS, and SECOND_LEVEL_CHECKS should be rewritten
|
// SECOND_LEVEL_CHECKS, and SECOND_LEVEL_CHECKS should be rewritten
|
||||||
// later in the same way.
|
// later in the same way.
|
||||||
#define SECOND_LEVEL_CHECKS_BEST \
|
#define SECOND_LEVEL_CHECKS_BEST(k) \
|
||||||
{ \
|
{ \
|
||||||
unsigned int second; \
|
unsigned int second; \
|
||||||
int br0 = br; \
|
int br0 = br; \
|
||||||
@@ -287,10 +313,10 @@ static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c) {
|
|||||||
} else if (tr != br && tc == bc) { \
|
} else if (tr != br && tc == bc) { \
|
||||||
kr = br - tr; \
|
kr = br - tr; \
|
||||||
} \
|
} \
|
||||||
CHECK_BETTER(second, br0 + kr, bc0); \
|
CHECK_BETTER##k(second, br0 + kr, bc0); \
|
||||||
CHECK_BETTER(second, br0, bc0 + kc); \
|
CHECK_BETTER##k(second, br0, bc0 + kc); \
|
||||||
if (br0 != br || bc0 != bc) { \
|
if (br0 != br || bc0 != bc) { \
|
||||||
CHECK_BETTER(second, br0 + kr, bc0 + kc); \
|
CHECK_BETTER##k(second, br0 + kr, bc0 + kc); \
|
||||||
} \
|
} \
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -412,7 +438,11 @@ int vp10_find_best_sub_pixel_tree_pruned_evenmore(
|
|||||||
int *distortion,
|
int *distortion,
|
||||||
unsigned int *sse1,
|
unsigned int *sse1,
|
||||||
const uint8_t *second_pred,
|
const uint8_t *second_pred,
|
||||||
|
#if CONFIG_AFFINE_MOTION
|
||||||
|
int w, int h, int use_upsampled_ref) {
|
||||||
|
#else
|
||||||
int w, int h) {
|
int w, int h) {
|
||||||
|
#endif
|
||||||
SETUP_SUBPEL_SEARCH;
|
SETUP_SUBPEL_SEARCH;
|
||||||
besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp,
|
besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp,
|
||||||
z, src_stride, y, y_stride, second_pred,
|
z, src_stride, y, y_stride, second_pred,
|
||||||
@@ -425,6 +455,9 @@ int vp10_find_best_sub_pixel_tree_pruned_evenmore(
|
|||||||
(void) allow_hp;
|
(void) allow_hp;
|
||||||
(void) forced_stop;
|
(void) forced_stop;
|
||||||
(void) hstep;
|
(void) hstep;
|
||||||
|
#if CONFIG_AFFINE_MOTION
|
||||||
|
(void) use_upsampled_ref;
|
||||||
|
#endif
|
||||||
|
|
||||||
if (cost_list &&
|
if (cost_list &&
|
||||||
cost_list[0] != INT_MAX && cost_list[1] != INT_MAX &&
|
cost_list[0] != INT_MAX && cost_list[1] != INT_MAX &&
|
||||||
@@ -491,8 +524,17 @@ int vp10_find_best_sub_pixel_tree_pruned_more(const MACROBLOCK *x,
|
|||||||
int *distortion,
|
int *distortion,
|
||||||
unsigned int *sse1,
|
unsigned int *sse1,
|
||||||
const uint8_t *second_pred,
|
const uint8_t *second_pred,
|
||||||
|
#if CONFIG_AFFINE_MOTION
|
||||||
|
int w, int h,
|
||||||
|
int use_upsampled_ref) {
|
||||||
|
#else
|
||||||
int w, int h) {
|
int w, int h) {
|
||||||
|
#endif
|
||||||
SETUP_SUBPEL_SEARCH;
|
SETUP_SUBPEL_SEARCH;
|
||||||
|
#if CONFIG_AFFINE_MOTION
|
||||||
|
(void) use_upsampled_ref;
|
||||||
|
#endif
|
||||||
|
|
||||||
besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp,
|
besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp,
|
||||||
z, src_stride, y, y_stride, second_pred,
|
z, src_stride, y, y_stride, second_pred,
|
||||||
w, h, offset, mvjcost, mvcost,
|
w, h, offset, mvjcost, mvcost,
|
||||||
@@ -565,8 +607,16 @@ int vp10_find_best_sub_pixel_tree_pruned(const MACROBLOCK *x,
|
|||||||
int *distortion,
|
int *distortion,
|
||||||
unsigned int *sse1,
|
unsigned int *sse1,
|
||||||
const uint8_t *second_pred,
|
const uint8_t *second_pred,
|
||||||
|
#if CONFIG_AFFINE_MOTION
|
||||||
|
int w, int h, int use_upsampled_ref) {
|
||||||
|
#else
|
||||||
int w, int h) {
|
int w, int h) {
|
||||||
|
#endif
|
||||||
SETUP_SUBPEL_SEARCH;
|
SETUP_SUBPEL_SEARCH;
|
||||||
|
#if CONFIG_AFFINE_MOTION
|
||||||
|
(void) use_upsampled_ref;
|
||||||
|
#endif
|
||||||
|
|
||||||
besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp,
|
besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp,
|
||||||
z, src_stride, y, y_stride, second_pred,
|
z, src_stride, y, y_stride, second_pred,
|
||||||
w, h, offset, mvjcost, mvcost,
|
w, h, offset, mvjcost, mvcost,
|
||||||
@@ -655,6 +705,101 @@ static const MV search_step_table[12] = {
|
|||||||
{0, -1}, {0, 1}, {-1, 0}, {1, 0}
|
{0, -1}, {0, 1}, {-1, 0}, {1, 0}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
#if CONFIG_AFFINE_MOTION
|
||||||
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
|
static void highbd_comp_avg_upsampled_pred(uint16_t *comp_pred,
|
||||||
|
const uint8_t *pred8,
|
||||||
|
int width, int height,
|
||||||
|
const uint8_t *ref8,
|
||||||
|
int ref_stride) {
|
||||||
|
int i, j;
|
||||||
|
int stride = ref_stride << 3;
|
||||||
|
|
||||||
|
uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
|
||||||
|
uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
|
||||||
|
for (i = 0; i < height; ++i) {
|
||||||
|
for (j = 0; j < width; ++j) {
|
||||||
|
const int tmp = pred[j] + ref[(j << 3)];
|
||||||
|
comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1);
|
||||||
|
}
|
||||||
|
comp_pred += width;
|
||||||
|
pred += width;
|
||||||
|
ref += stride;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void highbd_upsampled_pred(uint16_t *comp_pred,
|
||||||
|
int width, int height,
|
||||||
|
const uint8_t *ref8,
|
||||||
|
int ref_stride) {
|
||||||
|
int i, j;
|
||||||
|
int stride = ref_stride << 3;
|
||||||
|
|
||||||
|
uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
|
||||||
|
for (i = 0; i < height; ++i) {
|
||||||
|
for (j = 0; j < width; ++j) {
|
||||||
|
comp_pred[j] = ref[(j << 3)];
|
||||||
|
}
|
||||||
|
comp_pred += width;
|
||||||
|
ref += stride;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
static int upsampled_pref_error(const MACROBLOCKD *xd,
|
||||||
|
const vp9_variance_fn_ptr_t *vfp,
|
||||||
|
const uint8_t *const src, const int src_stride,
|
||||||
|
const uint8_t *const y, int y_stride,
|
||||||
|
const uint8_t *second_pred,
|
||||||
|
int w, int h, unsigned int *sse) {
|
||||||
|
unsigned int besterr;
|
||||||
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
|
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
||||||
|
DECLARE_ALIGNED(16, uint16_t, pred16[64 * 64]);
|
||||||
|
if (second_pred != NULL)
|
||||||
|
highbd_comp_avg_upsampled_pred(pred16, second_pred, w, h, y,
|
||||||
|
y_stride);
|
||||||
|
else
|
||||||
|
highbd_upsampled_pred(pred16, w, h, y, y_stride);
|
||||||
|
|
||||||
|
besterr = vfp->vf(CONVERT_TO_BYTEPTR(pred16), w, src, src_stride,
|
||||||
|
sse);
|
||||||
|
} else {
|
||||||
|
DECLARE_ALIGNED(16, uint8_t, pred[64 * 64]);
|
||||||
|
#else
|
||||||
|
DECLARE_ALIGNED(16, uint8_t, pred[64 * 64]);
|
||||||
|
(void) xd;
|
||||||
|
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||||
|
if (second_pred != NULL)
|
||||||
|
vpx_comp_avg_upsampled_pred(pred, second_pred, w, h, y,
|
||||||
|
y_stride);
|
||||||
|
else
|
||||||
|
vpx_upsampled_pred(pred, w, h, y, y_stride);
|
||||||
|
|
||||||
|
besterr = vfp->vf(pred, w, src, src_stride, sse);
|
||||||
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
return besterr;
|
||||||
|
}
|
||||||
|
|
||||||
|
static unsigned int upsampled_setup_center_error(
|
||||||
|
const MACROBLOCKD *xd, const MV *bestmv, const MV *ref_mv,
|
||||||
|
int error_per_bit, const vp9_variance_fn_ptr_t *vfp,
|
||||||
|
const uint8_t *const src, const int src_stride,
|
||||||
|
const uint8_t *const y, int y_stride, const uint8_t *second_pred,
|
||||||
|
int w, int h, int offset, int *mvjcost, int *mvcost[2],
|
||||||
|
unsigned int *sse1, int *distortion) {
|
||||||
|
unsigned int besterr = upsampled_pref_error(xd, vfp, src, src_stride,
|
||||||
|
y + offset, y_stride, second_pred,
|
||||||
|
w, h, sse1);
|
||||||
|
*distortion = besterr;
|
||||||
|
besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
|
||||||
|
return besterr;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
int vp10_find_best_sub_pixel_tree(const MACROBLOCK *x,
|
int vp10_find_best_sub_pixel_tree(const MACROBLOCK *x,
|
||||||
MV *bestmv, const MV *ref_mv,
|
MV *bestmv, const MV *ref_mv,
|
||||||
int allow_hp,
|
int allow_hp,
|
||||||
@@ -667,14 +812,18 @@ int vp10_find_best_sub_pixel_tree(const MACROBLOCK *x,
|
|||||||
int *distortion,
|
int *distortion,
|
||||||
unsigned int *sse1,
|
unsigned int *sse1,
|
||||||
const uint8_t *second_pred,
|
const uint8_t *second_pred,
|
||||||
|
#if CONFIG_AFFINE_MOTION
|
||||||
|
int w, int h, int use_upsampled_ref) {
|
||||||
|
#else
|
||||||
int w, int h) {
|
int w, int h) {
|
||||||
|
#endif
|
||||||
const uint8_t *const z = x->plane[0].src.buf;
|
const uint8_t *const z = x->plane[0].src.buf;
|
||||||
const uint8_t *const src_address = z;
|
const uint8_t *const src_address = z;
|
||||||
const int src_stride = x->plane[0].src.stride;
|
const int src_stride = x->plane[0].src.stride;
|
||||||
const MACROBLOCKD *xd = &x->e_mbd;
|
const MACROBLOCKD *xd = &x->e_mbd;
|
||||||
unsigned int besterr = INT_MAX;
|
unsigned int besterr = INT_MAX;
|
||||||
unsigned int sse;
|
unsigned int sse;
|
||||||
int thismse;
|
unsigned int thismse;
|
||||||
const int y_stride = xd->plane[0].pre[0].stride;
|
const int y_stride = xd->plane[0].pre[0].stride;
|
||||||
const int offset = bestmv->row * y_stride + bestmv->col;
|
const int offset = bestmv->row * y_stride + bestmv->col;
|
||||||
const uint8_t *const y = xd->plane[0].pre[0].buf;
|
const uint8_t *const y = xd->plane[0].pre[0].buf;
|
||||||
@@ -703,6 +852,15 @@ int vp10_find_best_sub_pixel_tree(const MACROBLOCK *x,
|
|||||||
bestmv->row *= 8;
|
bestmv->row *= 8;
|
||||||
bestmv->col *= 8;
|
bestmv->col *= 8;
|
||||||
|
|
||||||
|
#if CONFIG_AFFINE_MOTION
|
||||||
|
// use_upsampled_ref can be 0 or 1
|
||||||
|
if (use_upsampled_ref)
|
||||||
|
besterr = upsampled_setup_center_error(xd, bestmv, ref_mv, error_per_bit,
|
||||||
|
vfp, z, src_stride, y, y_stride,
|
||||||
|
second_pred, w, h, (offset << 3),
|
||||||
|
mvjcost, mvcost, sse1, distortion);
|
||||||
|
else
|
||||||
|
#endif
|
||||||
besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp,
|
besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp,
|
||||||
z, src_stride, y, y_stride, second_pred,
|
z, src_stride, y, y_stride, second_pred,
|
||||||
w, h, offset, mvjcost, mvcost,
|
w, h, offset, mvjcost, mvcost,
|
||||||
@@ -716,16 +874,29 @@ int vp10_find_best_sub_pixel_tree(const MACROBLOCK *x,
|
|||||||
tr = br + search_step[idx].row;
|
tr = br + search_step[idx].row;
|
||||||
tc = bc + search_step[idx].col;
|
tc = bc + search_step[idx].col;
|
||||||
if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
|
if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
|
||||||
const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3);
|
MV this_mv = {tr, tc};
|
||||||
MV this_mv;
|
|
||||||
this_mv.row = tr;
|
#if CONFIG_AFFINE_MOTION
|
||||||
this_mv.col = tc;
|
if (use_upsampled_ref) {
|
||||||
|
const uint8_t *const pre_address = y + tr * y_stride + tc;
|
||||||
|
|
||||||
|
thismse = upsampled_pref_error(xd, vfp, src_address, src_stride,
|
||||||
|
pre_address, y_stride, second_pred,
|
||||||
|
w, h, &sse);
|
||||||
|
} else {
|
||||||
|
#endif
|
||||||
|
const uint8_t *const pre_address = y + (tr >> 3) * y_stride +
|
||||||
|
(tc >> 3);
|
||||||
if (second_pred == NULL)
|
if (second_pred == NULL)
|
||||||
thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr),
|
thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr),
|
||||||
src_address, src_stride, &sse);
|
src_address, src_stride, &sse);
|
||||||
else
|
else
|
||||||
thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr),
|
thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr),
|
||||||
src_address, src_stride, &sse, second_pred);
|
src_address, src_stride, &sse, second_pred);
|
||||||
|
#if CONFIG_AFFINE_MOTION
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
cost_array[idx] = thismse +
|
cost_array[idx] = thismse +
|
||||||
mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit);
|
mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit);
|
||||||
|
|
||||||
@@ -747,14 +918,29 @@ int vp10_find_best_sub_pixel_tree(const MACROBLOCK *x,
|
|||||||
tc = bc + kc;
|
tc = bc + kc;
|
||||||
tr = br + kr;
|
tr = br + kr;
|
||||||
if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
|
if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
|
||||||
const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3);
|
|
||||||
MV this_mv = {tr, tc};
|
MV this_mv = {tr, tc};
|
||||||
|
|
||||||
|
#if CONFIG_AFFINE_MOTION
|
||||||
|
if (use_upsampled_ref) {
|
||||||
|
const uint8_t *const pre_address = y + tr * y_stride + tc;
|
||||||
|
|
||||||
|
thismse = upsampled_pref_error(xd, vfp, src_address, src_stride,
|
||||||
|
pre_address, y_stride, second_pred,
|
||||||
|
w, h, &sse);
|
||||||
|
} else {
|
||||||
|
#endif
|
||||||
|
const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3);
|
||||||
|
|
||||||
if (second_pred == NULL)
|
if (second_pred == NULL)
|
||||||
thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr),
|
thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr),
|
||||||
src_address, src_stride, &sse);
|
src_address, src_stride, &sse);
|
||||||
else
|
else
|
||||||
thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr),
|
thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr),
|
||||||
src_address, src_stride, &sse, second_pred);
|
src_address, src_stride, &sse, second_pred);
|
||||||
|
#if CONFIG_AFFINE_MOTION
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
cost_array[4] = thismse +
|
cost_array[4] = thismse +
|
||||||
mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit);
|
mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit);
|
||||||
|
|
||||||
@@ -776,8 +962,17 @@ int vp10_find_best_sub_pixel_tree(const MACROBLOCK *x,
|
|||||||
bc = tc;
|
bc = tc;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (iters_per_step > 1 && best_idx != -1)
|
if (iters_per_step > 1 && best_idx != -1) {
|
||||||
SECOND_LEVEL_CHECKS_BEST;
|
#if CONFIG_AFFINE_MOTION
|
||||||
|
if (use_upsampled_ref) {
|
||||||
|
SECOND_LEVEL_CHECKS_BEST(1);
|
||||||
|
} else {
|
||||||
|
#endif
|
||||||
|
SECOND_LEVEL_CHECKS_BEST(0);
|
||||||
|
#if CONFIG_AFFINE_MOTION
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
tr = br;
|
tr = br;
|
||||||
tc = bc;
|
tc = bc;
|
||||||
|
|||||||
@@ -116,7 +116,11 @@ typedef int (fractional_mv_step_fp) (
|
|||||||
int *mvjcost, int *mvcost[2],
|
int *mvjcost, int *mvcost[2],
|
||||||
int *distortion, unsigned int *sse1,
|
int *distortion, unsigned int *sse1,
|
||||||
const uint8_t *second_pred,
|
const uint8_t *second_pred,
|
||||||
|
#if CONFIG_AFFINE_MOTION
|
||||||
|
int w, int h, int use_upsampled_ref);
|
||||||
|
#else
|
||||||
int w, int h);
|
int w, int h);
|
||||||
|
#endif
|
||||||
|
|
||||||
extern fractional_mv_step_fp vp10_find_best_sub_pixel_tree;
|
extern fractional_mv_step_fp vp10_find_best_sub_pixel_tree;
|
||||||
extern fractional_mv_step_fp vp10_find_best_sub_pixel_tree_pruned;
|
extern fractional_mv_step_fp vp10_find_best_sub_pixel_tree_pruned;
|
||||||
|
|||||||
@@ -3929,7 +3929,8 @@ static void joint_motion_search(VP10_COMP *cpi, MACROBLOCK *x,
|
|||||||
int_mv* ref_mv_sub8x8[2],
|
int_mv* ref_mv_sub8x8[2],
|
||||||
#endif
|
#endif
|
||||||
int_mv single_newmv[MAX_REF_FRAMES],
|
int_mv single_newmv[MAX_REF_FRAMES],
|
||||||
int *rate_mv) {
|
int *rate_mv,
|
||||||
|
const int block) {
|
||||||
const VP10_COMMON *const cm = &cpi->common;
|
const VP10_COMMON *const cm = &cpi->common;
|
||||||
const int pw = 4 * num_4x4_blocks_wide_lookup[bsize];
|
const int pw = 4 * num_4x4_blocks_wide_lookup[bsize];
|
||||||
const int ph = 4 * num_4x4_blocks_high_lookup[bsize];
|
const int ph = 4 * num_4x4_blocks_high_lookup[bsize];
|
||||||
@@ -4076,6 +4077,40 @@ static void joint_motion_search(VP10_COMP *cpi, MACROBLOCK *x,
|
|||||||
if (bestsme < INT_MAX) {
|
if (bestsme < INT_MAX) {
|
||||||
int dis; /* TODO: use dis in distortion calculation later. */
|
int dis; /* TODO: use dis in distortion calculation later. */
|
||||||
unsigned int sse;
|
unsigned int sse;
|
||||||
|
#if CONFIG_AFFINE_MOTION
|
||||||
|
// Use up-sampled reference frames.
|
||||||
|
struct macroblockd_plane *const pd = &xd->plane[0];
|
||||||
|
struct buf_2d backup_pred = pd->pre[0];
|
||||||
|
const YV12_BUFFER_CONFIG *upsampled_ref =
|
||||||
|
get_upsampled_ref(cpi, refs[id]);
|
||||||
|
|
||||||
|
// Set pred for Y plane
|
||||||
|
setup_pred_plane(&pd->pre[0], upsampled_ref->y_buffer,
|
||||||
|
upsampled_ref->y_stride, (mi_row << 3), (mi_col << 3),
|
||||||
|
NULL, pd->subsampling_x, pd->subsampling_y);
|
||||||
|
|
||||||
|
// If bsize < BLOCK_8X8, adjust pred pointer for this block
|
||||||
|
if (bsize < BLOCK_8X8)
|
||||||
|
pd->pre[0].buf =
|
||||||
|
&pd->pre[0].buf[(vp10_raster_block_offset(BLOCK_8X8, block,
|
||||||
|
pd->pre[0].stride)) << 3];
|
||||||
|
|
||||||
|
bestsme = cpi->find_fractional_mv_step(
|
||||||
|
x, &tmp_mv,
|
||||||
|
&ref_mv[id].as_mv,
|
||||||
|
cpi->common.allow_high_precision_mv,
|
||||||
|
x->errorperbit,
|
||||||
|
&cpi->fn_ptr[bsize],
|
||||||
|
0, cpi->sf.mv.subpel_iters_per_step,
|
||||||
|
NULL,
|
||||||
|
x->nmvjointcost, x->mvcost,
|
||||||
|
&dis, &sse, second_pred,
|
||||||
|
pw, ph, 1);
|
||||||
|
|
||||||
|
// Restore the reference frames.
|
||||||
|
pd->pre[0] = backup_pred;
|
||||||
|
#else
|
||||||
|
(void) block;
|
||||||
bestsme = cpi->find_fractional_mv_step(
|
bestsme = cpi->find_fractional_mv_step(
|
||||||
x, &tmp_mv,
|
x, &tmp_mv,
|
||||||
&ref_mv[id].as_mv,
|
&ref_mv[id].as_mv,
|
||||||
@@ -4087,6 +4122,7 @@ static void joint_motion_search(VP10_COMP *cpi, MACROBLOCK *x,
|
|||||||
x->nmvjointcost, x->mvcost,
|
x->nmvjointcost, x->mvcost,
|
||||||
&dis, &sse, second_pred,
|
&dis, &sse, second_pred,
|
||||||
pw, ph);
|
pw, ph);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
// Restore the pointer to the first (possibly scaled) prediction buffer.
|
// Restore the pointer to the first (possibly scaled) prediction buffer.
|
||||||
@@ -4367,6 +4403,43 @@ static int64_t rd_pick_best_sub8x8_mode(VP10_COMP *cpi, MACROBLOCK *x,
|
|||||||
|
|
||||||
if (bestsme < INT_MAX) {
|
if (bestsme < INT_MAX) {
|
||||||
int distortion;
|
int distortion;
|
||||||
|
#if CONFIG_AFFINE_MOTION
|
||||||
|
const int pw = 4 * num_4x4_blocks_wide_lookup[bsize];
|
||||||
|
const int ph = 4 * num_4x4_blocks_high_lookup[bsize];
|
||||||
|
// Use up-sampled reference frames.
|
||||||
|
struct macroblockd_plane *const pd = &xd->plane[0];
|
||||||
|
struct buf_2d backup_pred = pd->pre[0];
|
||||||
|
const YV12_BUFFER_CONFIG *upsampled_ref =
|
||||||
|
get_upsampled_ref(cpi, mbmi->ref_frame[0]);
|
||||||
|
|
||||||
|
// Set pred for Y plane
|
||||||
|
setup_pred_plane(&pd->pre[0], upsampled_ref->y_buffer,
|
||||||
|
upsampled_ref->y_stride,
|
||||||
|
(mi_row << 3), (mi_col << 3),
|
||||||
|
NULL, pd->subsampling_x, pd->subsampling_y);
|
||||||
|
|
||||||
|
// adjust pred pointer for this block
|
||||||
|
pd->pre[0].buf =
|
||||||
|
&pd->pre[0].buf[(vp10_raster_block_offset(BLOCK_8X8, i,
|
||||||
|
pd->pre[0].stride)) << 3];
|
||||||
|
|
||||||
|
cpi->find_fractional_mv_step(
|
||||||
|
x,
|
||||||
|
new_mv,
|
||||||
|
&bsi->ref_mv[0]->as_mv,
|
||||||
|
cm->allow_high_precision_mv,
|
||||||
|
x->errorperbit, &cpi->fn_ptr[bsize],
|
||||||
|
cpi->sf.mv.subpel_force_stop,
|
||||||
|
cpi->sf.mv.subpel_iters_per_step,
|
||||||
|
cond_cost_list(cpi, cost_list),
|
||||||
|
x->nmvjointcost, x->mvcost,
|
||||||
|
&distortion,
|
||||||
|
&x->pred_sse[mbmi->ref_frame[0]],
|
||||||
|
NULL, pw, ph, 1);
|
||||||
|
|
||||||
|
// Restore the reference frames.
|
||||||
|
pd->pre[0] = backup_pred;
|
||||||
|
#else
|
||||||
cpi->find_fractional_mv_step(
|
cpi->find_fractional_mv_step(
|
||||||
x,
|
x,
|
||||||
new_mv,
|
new_mv,
|
||||||
@@ -4380,6 +4453,7 @@ static int64_t rd_pick_best_sub8x8_mode(VP10_COMP *cpi, MACROBLOCK *x,
|
|||||||
&distortion,
|
&distortion,
|
||||||
&x->pred_sse[mbmi->ref_frame[0]],
|
&x->pred_sse[mbmi->ref_frame[0]],
|
||||||
NULL, 0, 0);
|
NULL, 0, 0);
|
||||||
|
#endif
|
||||||
|
|
||||||
// save motion search result for use in compound prediction
|
// save motion search result for use in compound prediction
|
||||||
#if CONFIG_EXT_INTER
|
#if CONFIG_EXT_INTER
|
||||||
@@ -4426,7 +4500,7 @@ static int64_t rd_pick_best_sub8x8_mode(VP10_COMP *cpi, MACROBLOCK *x,
|
|||||||
#else
|
#else
|
||||||
seg_mvs[i],
|
seg_mvs[i],
|
||||||
#endif // CONFIG_EXT_INTER
|
#endif // CONFIG_EXT_INTER
|
||||||
&rate_mv);
|
&rate_mv, i);
|
||||||
#if CONFIG_EXT_INTER
|
#if CONFIG_EXT_INTER
|
||||||
compound_seg_newmvs[i][0].as_int =
|
compound_seg_newmvs[i][0].as_int =
|
||||||
frame_mv[this_mode][mbmi->ref_frame[0]].as_int;
|
frame_mv[this_mode][mbmi->ref_frame[0]].as_int;
|
||||||
@@ -4975,6 +5049,33 @@ static void single_motion_search(VP10_COMP *cpi, MACROBLOCK *x,
|
|||||||
|
|
||||||
if (bestsme < INT_MAX) {
|
if (bestsme < INT_MAX) {
|
||||||
int dis; /* TODO: use dis in distortion calculation later. */
|
int dis; /* TODO: use dis in distortion calculation later. */
|
||||||
|
#if CONFIG_AFFINE_MOTION
|
||||||
|
const int pw = 4 * num_4x4_blocks_wide_lookup[bsize];
|
||||||
|
const int ph = 4 * num_4x4_blocks_high_lookup[bsize];
|
||||||
|
// Use up-sampled reference frames.
|
||||||
|
struct macroblockd_plane *const pd = &xd->plane[0];
|
||||||
|
struct buf_2d backup_pred = pd->pre[0];
|
||||||
|
const YV12_BUFFER_CONFIG *upsampled_ref = get_upsampled_ref(cpi, ref);
|
||||||
|
|
||||||
|
// Set pred for Y plane
|
||||||
|
setup_pred_plane(&pd->pre[0], upsampled_ref->y_buffer,
|
||||||
|
upsampled_ref->y_stride, (mi_row << 3), (mi_col << 3),
|
||||||
|
NULL, pd->subsampling_x, pd->subsampling_y);
|
||||||
|
|
||||||
|
bestsme = cpi->find_fractional_mv_step(x, &tmp_mv->as_mv, &ref_mv,
|
||||||
|
cm->allow_high_precision_mv,
|
||||||
|
x->errorperbit,
|
||||||
|
&cpi->fn_ptr[bsize],
|
||||||
|
cpi->sf.mv.subpel_force_stop,
|
||||||
|
cpi->sf.mv.subpel_iters_per_step,
|
||||||
|
cond_cost_list(cpi, cost_list),
|
||||||
|
x->nmvjointcost, x->mvcost,
|
||||||
|
&dis, &x->pred_sse[ref], NULL,
|
||||||
|
pw, ph, 1);
|
||||||
|
|
||||||
|
// Restore the reference frames.
|
||||||
|
pd->pre[0] = backup_pred;
|
||||||
|
#else
|
||||||
cpi->find_fractional_mv_step(x, &tmp_mv->as_mv, &ref_mv,
|
cpi->find_fractional_mv_step(x, &tmp_mv->as_mv, &ref_mv,
|
||||||
cm->allow_high_precision_mv,
|
cm->allow_high_precision_mv,
|
||||||
x->errorperbit,
|
x->errorperbit,
|
||||||
@@ -4984,6 +5085,7 @@ static void single_motion_search(VP10_COMP *cpi, MACROBLOCK *x,
|
|||||||
cond_cost_list(cpi, cost_list),
|
cond_cost_list(cpi, cost_list),
|
||||||
x->nmvjointcost, x->mvcost,
|
x->nmvjointcost, x->mvcost,
|
||||||
&dis, &x->pred_sse[ref], NULL, 0, 0);
|
&dis, &x->pred_sse[ref], NULL, 0, 0);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
*rate_mv = vp10_mv_bit_cost(&tmp_mv->as_mv, &ref_mv,
|
*rate_mv = vp10_mv_bit_cost(&tmp_mv->as_mv, &ref_mv,
|
||||||
x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
|
x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
|
||||||
@@ -5328,7 +5430,7 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x,
|
|||||||
|
|
||||||
if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
|
if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
|
||||||
joint_motion_search(cpi, x, bsize, frame_mv,
|
joint_motion_search(cpi, x, bsize, frame_mv,
|
||||||
mi_row, mi_col, NULL, single_newmv, &rate_mv);
|
mi_row, mi_col, NULL, single_newmv, &rate_mv, 0);
|
||||||
} else {
|
} else {
|
||||||
rate_mv = vp10_mv_bit_cost(&frame_mv[refs[0]].as_mv,
|
rate_mv = vp10_mv_bit_cost(&frame_mv[refs[0]].as_mv,
|
||||||
&x->mbmi_ext->ref_mvs[refs[0]][0].as_mv,
|
&x->mbmi_ext->ref_mvs[refs[0]][0].as_mv,
|
||||||
@@ -5358,7 +5460,7 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x,
|
|||||||
if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
|
if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
|
||||||
joint_motion_search(cpi, x, bsize, frame_mv,
|
joint_motion_search(cpi, x, bsize, frame_mv,
|
||||||
mi_row, mi_col,
|
mi_row, mi_col,
|
||||||
single_newmv, &rate_mv);
|
single_newmv, &rate_mv, 0);
|
||||||
} else {
|
} else {
|
||||||
rate_mv = vp10_mv_bit_cost(&frame_mv[refs[0]].as_mv,
|
rate_mv = vp10_mv_bit_cost(&frame_mv[refs[0]].as_mv,
|
||||||
&x->mbmi_ext->ref_mvs[refs[0]][0].as_mv,
|
&x->mbmi_ext->ref_mvs[refs[0]][0].as_mv,
|
||||||
|
|||||||
@@ -106,4 +106,20 @@ void vp10_build_prediction_by_left_preds(VP10_COMP *cpi,
|
|||||||
} // extern "C"
|
} // extern "C"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if CONFIG_AFFINE_MOTION
|
||||||
|
static INLINE const YV12_BUFFER_CONFIG *get_upsampled_ref(VP10_COMP *cpi,
|
||||||
|
const int ref) {
|
||||||
|
// Use up-sampled reference frames.
|
||||||
|
int ref_idx = 0;
|
||||||
|
if (ref == LAST_FRAME)
|
||||||
|
ref_idx = cpi->lst_fb_idx;
|
||||||
|
else if (ref == GOLDEN_FRAME)
|
||||||
|
ref_idx = cpi->gld_fb_idx;
|
||||||
|
else if (ref == ALTREF_FRAME)
|
||||||
|
ref_idx = cpi->alt_fb_idx;
|
||||||
|
|
||||||
|
return &cpi->upsampled_ref_bufs[cpi->upsampled_ref_idx[ref_idx]].buf;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif // VP10_ENCODER_RDOPT_H_
|
#endif // VP10_ENCODER_RDOPT_H_
|
||||||
|
|||||||
@@ -320,7 +320,11 @@ static int temporal_filter_find_matching_mb_c(VP10_COMP *cpi,
|
|||||||
0, mv_sf->subpel_iters_per_step,
|
0, mv_sf->subpel_iters_per_step,
|
||||||
cond_cost_list(cpi, cost_list),
|
cond_cost_list(cpi, cost_list),
|
||||||
NULL, NULL,
|
NULL, NULL,
|
||||||
|
#if CONFIG_AFFINE_MOTION
|
||||||
|
&distortion, &sse, NULL, 0, 0, 0);
|
||||||
|
#else
|
||||||
&distortion, &sse, NULL, 0, 0);
|
&distortion, &sse, NULL, 0, 0);
|
||||||
|
#endif
|
||||||
|
|
||||||
// Restore input state
|
// Restore input state
|
||||||
x->plane[0].src = src;
|
x->plane[0].src = src;
|
||||||
|
|||||||
@@ -272,6 +272,41 @@ void vpx_comp_avg_pred_c(uint8_t *comp_pred, const uint8_t *pred,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if CONFIG_AFFINE_MOTION
|
||||||
|
// Get pred block from up-sampled reference.
|
||||||
|
void vpx_upsampled_pred_c(uint8_t *comp_pred,
|
||||||
|
int width, int height,
|
||||||
|
const uint8_t *ref, int ref_stride) {
|
||||||
|
int i, j, k;
|
||||||
|
int stride = ref_stride << 3;
|
||||||
|
|
||||||
|
for (i = 0; i < height; i++) {
|
||||||
|
for (j = 0, k = 0; j < width; j++, k += 8) {
|
||||||
|
comp_pred[j] = ref[k];
|
||||||
|
}
|
||||||
|
comp_pred += width;
|
||||||
|
ref += stride;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void vpx_comp_avg_upsampled_pred_c(uint8_t *comp_pred, const uint8_t *pred,
|
||||||
|
int width, int height,
|
||||||
|
const uint8_t *ref, int ref_stride) {
|
||||||
|
int i, j;
|
||||||
|
int stride = ref_stride << 3;
|
||||||
|
|
||||||
|
for (i = 0; i < height; i++) {
|
||||||
|
for (j = 0; j < width; j++) {
|
||||||
|
const int tmp = ref[(j << 3)] + pred[j];
|
||||||
|
comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1);
|
||||||
|
}
|
||||||
|
comp_pred += width;
|
||||||
|
pred += width;
|
||||||
|
ref += stride;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
#if CONFIG_VP9_HIGHBITDEPTH
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
static void highbd_variance64(const uint8_t *a8, int a_stride,
|
static void highbd_variance64(const uint8_t *a8, int a_stride,
|
||||||
const uint8_t *b8, int b_stride,
|
const uint8_t *b8, int b_stride,
|
||||||
|
|||||||
@@ -1464,6 +1464,13 @@ add_proto qw/unsigned int vpx_get4x4sse_cs/, "const unsigned char *src_ptr, int
|
|||||||
|
|
||||||
add_proto qw/void vpx_comp_avg_pred/, "uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride";
|
add_proto qw/void vpx_comp_avg_pred/, "uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride";
|
||||||
|
|
||||||
|
if (vpx_config("CONFIG_AFFINE_MOTION") eq "yes") {
|
||||||
|
add_proto qw/void vpx_upsampled_pred/, "uint8_t *comp_pred, int width, int height, const uint8_t *ref, int ref_stride";
|
||||||
|
specialize qw/vpx_upsampled_pred sse2/;
|
||||||
|
add_proto qw/void vpx_comp_avg_upsampled_pred/, "uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride";
|
||||||
|
specialize qw/vpx_comp_avg_upsampled_pred sse2/;
|
||||||
|
}
|
||||||
|
|
||||||
#
|
#
|
||||||
# Subpixel Variance
|
# Subpixel Variance
|
||||||
#
|
#
|
||||||
|
|||||||
@@ -475,3 +475,232 @@ FNS(ssse3, ssse3);
|
|||||||
#undef FNS
|
#undef FNS
|
||||||
#undef FN
|
#undef FN
|
||||||
#endif // CONFIG_USE_X86INC
|
#endif // CONFIG_USE_X86INC
|
||||||
|
|
||||||
|
#if CONFIG_AFFINE_MOTION
|
||||||
|
void vpx_upsampled_pred_sse2(uint8_t *comp_pred,
|
||||||
|
int width, int height,
|
||||||
|
const uint8_t *ref, int ref_stride) {
|
||||||
|
int i, j;
|
||||||
|
int stride = ref_stride << 3;
|
||||||
|
|
||||||
|
if (width >= 16) {
|
||||||
|
// read 16 points at one time
|
||||||
|
for (i = 0; i < height; i++) {
|
||||||
|
for (j = 0; j < width; j+= 16) {
|
||||||
|
__m128i s0 = _mm_loadu_si128((const __m128i *)ref);
|
||||||
|
__m128i s1 = _mm_loadu_si128((const __m128i *)(ref + 16));
|
||||||
|
__m128i s2 = _mm_loadu_si128((const __m128i *)(ref + 32));
|
||||||
|
__m128i s3 = _mm_loadu_si128((const __m128i *)(ref + 48));
|
||||||
|
__m128i s4 = _mm_loadu_si128((const __m128i *)(ref + 64));
|
||||||
|
__m128i s5 = _mm_loadu_si128((const __m128i *)(ref + 80));
|
||||||
|
__m128i s6 = _mm_loadu_si128((const __m128i *)(ref + 96));
|
||||||
|
__m128i s7 = _mm_loadu_si128((const __m128i *)(ref + 112));
|
||||||
|
__m128i t0, t1, t2, t3;
|
||||||
|
|
||||||
|
t0 = _mm_unpacklo_epi8(s0, s1);
|
||||||
|
s1 = _mm_unpackhi_epi8(s0, s1);
|
||||||
|
t1 = _mm_unpacklo_epi8(s2, s3);
|
||||||
|
s3 = _mm_unpackhi_epi8(s2, s3);
|
||||||
|
t2 = _mm_unpacklo_epi8(s4, s5);
|
||||||
|
s5 = _mm_unpackhi_epi8(s4, s5);
|
||||||
|
t3 = _mm_unpacklo_epi8(s6, s7);
|
||||||
|
s7 = _mm_unpackhi_epi8(s6, s7);
|
||||||
|
|
||||||
|
s0 = _mm_unpacklo_epi8(t0, s1);
|
||||||
|
s2 = _mm_unpacklo_epi8(t1, s3);
|
||||||
|
s4 = _mm_unpacklo_epi8(t2, s5);
|
||||||
|
s6 = _mm_unpacklo_epi8(t3, s7);
|
||||||
|
|
||||||
|
*(int *)comp_pred = _mm_cvtsi128_si32(s0);
|
||||||
|
*(int *)(comp_pred + 4) = _mm_cvtsi128_si32(s2);
|
||||||
|
*(int *)(comp_pred + 8) = _mm_cvtsi128_si32(s4);
|
||||||
|
*(int *)(comp_pred + 12) = _mm_cvtsi128_si32(s6);
|
||||||
|
|
||||||
|
comp_pred += 16;
|
||||||
|
ref += 16 * 8;
|
||||||
|
}
|
||||||
|
ref += stride - (width << 3);
|
||||||
|
}
|
||||||
|
} else if (width >= 8) {
|
||||||
|
// read 8 points at one time
|
||||||
|
for (i = 0; i < height; i++) {
|
||||||
|
for (j = 0; j < width; j+= 8) {
|
||||||
|
__m128i s0 = _mm_loadu_si128((const __m128i *)ref);
|
||||||
|
__m128i s1 = _mm_loadu_si128((const __m128i *)(ref + 16));
|
||||||
|
__m128i s2 = _mm_loadu_si128((const __m128i *)(ref + 32));
|
||||||
|
__m128i s3 = _mm_loadu_si128((const __m128i *)(ref + 48));
|
||||||
|
__m128i t0, t1;
|
||||||
|
|
||||||
|
t0 = _mm_unpacklo_epi8(s0, s1);
|
||||||
|
s1 = _mm_unpackhi_epi8(s0, s1);
|
||||||
|
t1 = _mm_unpacklo_epi8(s2, s3);
|
||||||
|
s3 = _mm_unpackhi_epi8(s2, s3);
|
||||||
|
|
||||||
|
s0 = _mm_unpacklo_epi8(t0, s1);
|
||||||
|
s2 = _mm_unpacklo_epi8(t1, s3);
|
||||||
|
|
||||||
|
*(int *)comp_pred = _mm_cvtsi128_si32(s0);
|
||||||
|
*(int *)(comp_pred + 4) = _mm_cvtsi128_si32(s2);
|
||||||
|
comp_pred += 8;
|
||||||
|
ref += 8 * 8;
|
||||||
|
}
|
||||||
|
ref += stride - (width << 3);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// read 4 points at one time
|
||||||
|
for (i = 0; i < height; i++) {
|
||||||
|
for (j = 0; j < width; j+= 4) {
|
||||||
|
__m128i s0 = _mm_loadu_si128((const __m128i *)ref);
|
||||||
|
__m128i s1 = _mm_loadu_si128((const __m128i *)(ref + 16));
|
||||||
|
__m128i t0;
|
||||||
|
|
||||||
|
t0 = _mm_unpacklo_epi8(s0, s1);
|
||||||
|
s1 = _mm_unpackhi_epi8(s0, s1);
|
||||||
|
s0 = _mm_unpacklo_epi8(t0, s1);
|
||||||
|
|
||||||
|
*(int *)comp_pred = _mm_cvtsi128_si32(s0);
|
||||||
|
|
||||||
|
comp_pred += 4;
|
||||||
|
ref += 4 * 8;
|
||||||
|
}
|
||||||
|
ref += stride - (width << 3);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void vpx_comp_avg_upsampled_pred_sse2(uint8_t *comp_pred, const uint8_t *pred,
|
||||||
|
int width, int height,
|
||||||
|
const uint8_t *ref, int ref_stride) {
|
||||||
|
const __m128i zero = _mm_set1_epi16(0);
|
||||||
|
const __m128i one = _mm_set1_epi16(1);
|
||||||
|
int i, j;
|
||||||
|
int stride = ref_stride << 3;
|
||||||
|
|
||||||
|
if (width >= 16) {
|
||||||
|
// read 16 points at one time
|
||||||
|
for (i = 0; i < height; i++) {
|
||||||
|
for (j = 0; j < width; j+= 16) {
|
||||||
|
__m128i s0 = _mm_loadu_si128((const __m128i *)ref);
|
||||||
|
__m128i s1 = _mm_loadu_si128((const __m128i *)(ref + 16));
|
||||||
|
__m128i s2 = _mm_loadu_si128((const __m128i *)(ref + 32));
|
||||||
|
__m128i s3 = _mm_loadu_si128((const __m128i *)(ref + 48));
|
||||||
|
__m128i s4 = _mm_loadu_si128((const __m128i *)(ref + 64));
|
||||||
|
__m128i s5 = _mm_loadu_si128((const __m128i *)(ref + 80));
|
||||||
|
__m128i s6 = _mm_loadu_si128((const __m128i *)(ref + 96));
|
||||||
|
__m128i s7 = _mm_loadu_si128((const __m128i *)(ref + 112));
|
||||||
|
__m128i p0 = _mm_loadu_si128((const __m128i *)pred);
|
||||||
|
__m128i p1;
|
||||||
|
__m128i t0, t1, t2, t3;
|
||||||
|
|
||||||
|
t0 = _mm_unpacklo_epi8(s0, s1);
|
||||||
|
s1 = _mm_unpackhi_epi8(s0, s1);
|
||||||
|
t1 = _mm_unpacklo_epi8(s2, s3);
|
||||||
|
s3 = _mm_unpackhi_epi8(s2, s3);
|
||||||
|
t2 = _mm_unpacklo_epi8(s4, s5);
|
||||||
|
s5 = _mm_unpackhi_epi8(s4, s5);
|
||||||
|
t3 = _mm_unpacklo_epi8(s6, s7);
|
||||||
|
s7 = _mm_unpackhi_epi8(s6, s7);
|
||||||
|
|
||||||
|
s0 = _mm_unpacklo_epi8(t0, s1);
|
||||||
|
s2 = _mm_unpacklo_epi8(t1, s3);
|
||||||
|
s4 = _mm_unpacklo_epi8(t2, s5);
|
||||||
|
s6 = _mm_unpacklo_epi8(t3, s7);
|
||||||
|
|
||||||
|
s0 = _mm_unpacklo_epi32(s0, s2);
|
||||||
|
s4 = _mm_unpacklo_epi32(s4, s6);
|
||||||
|
s0 = _mm_unpacklo_epi8(s0, zero);
|
||||||
|
s4 = _mm_unpacklo_epi8(s4, zero);
|
||||||
|
|
||||||
|
p1 = _mm_unpackhi_epi8(p0, zero);
|
||||||
|
p0 = _mm_unpacklo_epi8(p0, zero);
|
||||||
|
p0 = _mm_adds_epu16(s0, p0);
|
||||||
|
p1 = _mm_adds_epu16(s4, p1);
|
||||||
|
p0 = _mm_adds_epu16(p0, one);
|
||||||
|
p1 = _mm_adds_epu16(p1, one);
|
||||||
|
|
||||||
|
p0 = _mm_srli_epi16(p0, 1);
|
||||||
|
p1 = _mm_srli_epi16(p1, 1);
|
||||||
|
p0 = _mm_packus_epi16(p0, p1);
|
||||||
|
|
||||||
|
*(int *)comp_pred = _mm_cvtsi128_si32(p0);
|
||||||
|
p0 = _mm_srli_si128(p0, 4);
|
||||||
|
*(int *)(comp_pred + 4) = _mm_cvtsi128_si32(p0);
|
||||||
|
p0 = _mm_srli_si128(p0, 4);
|
||||||
|
*(int *)(comp_pred + 8) = _mm_cvtsi128_si32(p0);
|
||||||
|
p0 = _mm_srli_si128(p0, 4);
|
||||||
|
*(int *)(comp_pred + 12) = _mm_cvtsi128_si32(p0);
|
||||||
|
|
||||||
|
comp_pred += 16;
|
||||||
|
pred += 16;
|
||||||
|
ref += 16 * 8;
|
||||||
|
}
|
||||||
|
ref += stride - (width << 3);
|
||||||
|
}
|
||||||
|
} else if (width >= 8) {
|
||||||
|
// read 8 points at one time
|
||||||
|
for (i = 0; i < height; i++) {
|
||||||
|
for (j = 0; j < width; j+= 8) {
|
||||||
|
__m128i s0 = _mm_loadu_si128((const __m128i *)ref);
|
||||||
|
__m128i s1 = _mm_loadu_si128((const __m128i *)(ref + 16));
|
||||||
|
__m128i s2 = _mm_loadu_si128((const __m128i *)(ref + 32));
|
||||||
|
__m128i s3 = _mm_loadu_si128((const __m128i *)(ref + 48));
|
||||||
|
__m128i p0 = _mm_loadl_epi64((const __m128i *)pred);
|
||||||
|
__m128i t0, t1;
|
||||||
|
|
||||||
|
t0 = _mm_unpacklo_epi8(s0, s1);
|
||||||
|
s1 = _mm_unpackhi_epi8(s0, s1);
|
||||||
|
t1 = _mm_unpacklo_epi8(s2, s3);
|
||||||
|
s3 = _mm_unpackhi_epi8(s2, s3);
|
||||||
|
|
||||||
|
s0 = _mm_unpacklo_epi8(t0, s1);
|
||||||
|
s2 = _mm_unpacklo_epi8(t1, s3);
|
||||||
|
s0 = _mm_unpacklo_epi32(s0, s2);
|
||||||
|
s0 = _mm_unpacklo_epi8(s0, zero);
|
||||||
|
|
||||||
|
p0 = _mm_unpacklo_epi8(p0, zero);
|
||||||
|
p0 = _mm_adds_epu16(s0, p0);
|
||||||
|
p0 = _mm_adds_epu16(p0, one);
|
||||||
|
p0 = _mm_srli_epi16(p0, 1);
|
||||||
|
p0 = _mm_packus_epi16(p0, zero);
|
||||||
|
|
||||||
|
*(int *)comp_pred = _mm_cvtsi128_si32(p0);
|
||||||
|
p0 = _mm_srli_si128(p0, 4);
|
||||||
|
*(int *)(comp_pred + 4) = _mm_cvtsi128_si32(p0);
|
||||||
|
|
||||||
|
comp_pred += 8;
|
||||||
|
pred += 8;
|
||||||
|
ref += 8 * 8;
|
||||||
|
}
|
||||||
|
ref += stride - (width << 3);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// read 4 points at one time
|
||||||
|
for (i = 0; i < height; i++) {
|
||||||
|
for (j = 0; j < width; j+= 4) {
|
||||||
|
__m128i s0 = _mm_loadu_si128((const __m128i *)ref);
|
||||||
|
__m128i s1 = _mm_loadu_si128((const __m128i *)(ref + 16));
|
||||||
|
__m128i p0 = _mm_cvtsi32_si128(*(const uint32_t *)pred);
|
||||||
|
__m128i t0;
|
||||||
|
|
||||||
|
t0 = _mm_unpacklo_epi8(s0, s1);
|
||||||
|
s1 = _mm_unpackhi_epi8(s0, s1);
|
||||||
|
s0 = _mm_unpacklo_epi8(t0, s1);
|
||||||
|
s0 = _mm_unpacklo_epi8(s0, zero);
|
||||||
|
|
||||||
|
p0 = _mm_unpacklo_epi8(p0, zero);
|
||||||
|
p0 = _mm_adds_epu16(s0, p0);
|
||||||
|
p0 = _mm_adds_epu16(p0, one);
|
||||||
|
p0 = _mm_srli_epi16(p0, 1);
|
||||||
|
p0 = _mm_packus_epi16(p0, zero);
|
||||||
|
|
||||||
|
*(int *)comp_pred = _mm_cvtsi128_si32(p0);
|
||||||
|
|
||||||
|
comp_pred += 4;
|
||||||
|
pred += 4;
|
||||||
|
ref += 4 * 8;
|
||||||
|
}
|
||||||
|
ref += stride - (width << 3);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|||||||
@@ -210,6 +210,30 @@ void vpx_extend_frame_inner_borders_c(YV12_BUFFER_CONFIG *ybf) {
|
|||||||
extend_frame(ybf, inner_bw);
|
extend_frame(ybf, inner_bw);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void vpx_extend_frame_borders_y_c(YV12_BUFFER_CONFIG *ybf) {
|
||||||
|
int ext_size = ybf->border;
|
||||||
|
assert(ybf->y_height - ybf->y_crop_height < 16);
|
||||||
|
assert(ybf->y_width - ybf->y_crop_width < 16);
|
||||||
|
assert(ybf->y_height - ybf->y_crop_height >= 0);
|
||||||
|
assert(ybf->y_width - ybf->y_crop_width >= 0);
|
||||||
|
|
||||||
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
|
if (ybf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
||||||
|
extend_plane_high(ybf->y_buffer, ybf->y_stride,
|
||||||
|
ybf->y_crop_width, ybf->y_crop_height,
|
||||||
|
ext_size, ext_size,
|
||||||
|
ext_size + ybf->y_height - ybf->y_crop_height,
|
||||||
|
ext_size + ybf->y_width - ybf->y_crop_width);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
extend_plane(ybf->y_buffer, ybf->y_stride,
|
||||||
|
ybf->y_crop_width, ybf->y_crop_height,
|
||||||
|
ext_size, ext_size,
|
||||||
|
ext_size + ybf->y_height - ybf->y_crop_height,
|
||||||
|
ext_size + ybf->y_width - ybf->y_crop_width);
|
||||||
|
}
|
||||||
|
|
||||||
#if CONFIG_VP9_HIGHBITDEPTH
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
void memcpy_short_addr(uint8_t *dst8, const uint8_t *src8, int num) {
|
void memcpy_short_addr(uint8_t *dst8, const uint8_t *src8, int num) {
|
||||||
uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
|
uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
|
||||||
|
|||||||
@@ -28,5 +28,8 @@ if ((vpx_config("CONFIG_VP9") eq "yes") || (vpx_config("CONFIG_VP10") eq "yes"))
|
|||||||
|
|
||||||
add_proto qw/void vpx_extend_frame_inner_borders/, "struct yv12_buffer_config *ybf";
|
add_proto qw/void vpx_extend_frame_inner_borders/, "struct yv12_buffer_config *ybf";
|
||||||
specialize qw/vpx_extend_frame_inner_borders dspr2/;
|
specialize qw/vpx_extend_frame_inner_borders dspr2/;
|
||||||
|
|
||||||
|
add_proto qw/void vpx_extend_frame_borders_y/, "struct yv12_buffer_config *ybf";
|
||||||
|
specialize qw/vpx_extend_frame_borders_y/;
|
||||||
}
|
}
|
||||||
1;
|
1;
|
||||||
|
|||||||
Reference in New Issue
Block a user