vpx/vp9/common/vp9_mvref_common.c

219 lines
8.0 KiB
C
Raw Normal View History

/*
* Copyright (c) 2012 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "vp9/common/vp9_mvref_common.h"
// This function searches the neighbourhood of a given MB/SB
// to try and find candidate reference vectors.
static void find_mv_refs_idx(const VP9_COMMON *cm, const MACROBLOCKD *xd,
const TileInfo *const tile,
MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame,
int_mv *mv_ref_list,
Try again to merge branch 'frame-parallel' into master branch. In frame parallel decode, libvpx decoder decodes several frames on all cpus in parallel fashion. If not being flushed, it will only return frame when all the cpus are busy. If getting flushed, it will return all the frames in the decoder. Compare with current serial decode mode in which libvpx decoder is idle between decode calls, libvpx decoder is busy between decode calls. Current frame parallel decode will only speed up the decoding for frame parallel encoded videos. For non frame parallel encoded videos, frame parallel decode is slower than serial decode due to lack of loopfilter worker thread. There are still some known issues that need to be addressed. For example: decode frame parallel videos with segmentation enabled is not right sometimes. * frame-parallel: Add error handling for frame parallel decode and unit test for that. Fix a bug in frame parallel decode and add a unit test for that. Add two test vectors to test frame parallel decode. Add key frame seeking to webmdec and webm_video_source. Implement frame parallel decode for VP9. Increase the thread test range to cover 5, 6, 7, 8 threads. Fix a bug in adding frame parallel unit test. Add VP9 frame-parallel unit test. Manually pick "Make the api behavior conform to api spec." from master branch. Move vp9_dec_build_inter_predictors_* to decoder folder. Add segmentation map array for current and last frame segmentation. Include the right header for VP9 worker thread. Move vp9_thread.* to common. ctrl_get_reference does not need user_priv. Seperate the frame buffers from VP9 encoder/decoder structure. Revert "Revert "Revert "Revert 3 patches from Hangyu to get Chrome to build:""" Conflicts: test/codec_factory.h test/decode_test_driver.cc test/decode_test_driver.h test/invalid_file_test.cc test/test-data.sha1 test/test.mk test/test_vectors.cc vp8/vp8_dx_iface.c vp9/common/vp9_alloccommon.c vp9/common/vp9_entropymode.c vp9/common/vp9_loopfilter_thread.c vp9/common/vp9_loopfilter_thread.h vp9/common/vp9_mvref_common.c vp9/common/vp9_onyxc_int.h vp9/common/vp9_reconinter.c vp9/decoder/vp9_decodeframe.c vp9/decoder/vp9_decodeframe.h vp9/decoder/vp9_decodemv.c vp9/decoder/vp9_decoder.c vp9/decoder/vp9_decoder.h vp9/encoder/vp9_encoder.c vp9/encoder/vp9_pickmode.c vp9/encoder/vp9_rdopt.c vp9/vp9_cx_iface.c vp9/vp9_dx_iface.c This reverts commit a18da9760a74d9ce6fb9f875706dc639c95402f5. Change-Id: I361442ffec1586d036ea2e0ee97ce4f077585f02
2015-01-27 21:26:28 +01:00
int block, int mi_row, int mi_col,
find_mv_refs_sync sync, void *const data) {
const int *ref_sign_bias = cm->ref_frame_sign_bias;
int i, refmv_count = 0;
const POSITION *const mv_ref_search = mv_ref_blocks[mi->mbmi.sb_type];
int different_ref_found = 0;
int context_counter = 0;
const MV_REF *const prev_frame_mvs = cm->use_prev_frame_mvs ?
cm->prev_frame->mvs + mi_row * cm->mi_cols + mi_col : NULL;
// Blank the reference vector list
vpx_memset(mv_ref_list, 0, sizeof(*mv_ref_list) * MAX_MV_REF_CANDIDATES);
// The nearest 2 blocks are treated differently
// if the size < 8x8 we get the mv from the bmi substructure,
// and we also need to keep a mode count.
for (i = 0; i < 2; ++i) {
const POSITION *const mv_ref = &mv_ref_search[i];
if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) {
const MODE_INFO *const candidate_mi = xd->mi[mv_ref->col + mv_ref->row *
xd->mi_stride].src_mi;
const MB_MODE_INFO *const candidate = &candidate_mi->mbmi;
// Keep counts for entropy encoding.
context_counter += mode_2_counter[candidate->mode];
different_ref_found = 1;
if (candidate->ref_frame[0] == ref_frame)
ADD_MV_REF_LIST(get_sub_block_mv(candidate_mi, 0, mv_ref->col, block),
refmv_count, mv_ref_list, Done);
else if (candidate->ref_frame[1] == ref_frame)
ADD_MV_REF_LIST(get_sub_block_mv(candidate_mi, 1, mv_ref->col, block),
refmv_count, mv_ref_list, Done);
}
}
// Check the rest of the neighbors in much the same way
// as before except we don't need to keep track of sub blocks or
// mode counts.
for (; i < MVREF_NEIGHBOURS; ++i) {
const POSITION *const mv_ref = &mv_ref_search[i];
if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) {
const MB_MODE_INFO *const candidate = &xd->mi[mv_ref->col + mv_ref->row *
xd->mi_stride].src_mi->mbmi;
different_ref_found = 1;
if (candidate->ref_frame[0] == ref_frame)
ADD_MV_REF_LIST(candidate->mv[0], refmv_count, mv_ref_list, Done);
else if (candidate->ref_frame[1] == ref_frame)
ADD_MV_REF_LIST(candidate->mv[1], refmv_count, mv_ref_list, Done);
}
}
Try again to merge branch 'frame-parallel' into master branch. In frame parallel decode, libvpx decoder decodes several frames on all cpus in parallel fashion. If not being flushed, it will only return frame when all the cpus are busy. If getting flushed, it will return all the frames in the decoder. Compare with current serial decode mode in which libvpx decoder is idle between decode calls, libvpx decoder is busy between decode calls. Current frame parallel decode will only speed up the decoding for frame parallel encoded videos. For non frame parallel encoded videos, frame parallel decode is slower than serial decode due to lack of loopfilter worker thread. There are still some known issues that need to be addressed. For example: decode frame parallel videos with segmentation enabled is not right sometimes. * frame-parallel: Add error handling for frame parallel decode and unit test for that. Fix a bug in frame parallel decode and add a unit test for that. Add two test vectors to test frame parallel decode. Add key frame seeking to webmdec and webm_video_source. Implement frame parallel decode for VP9. Increase the thread test range to cover 5, 6, 7, 8 threads. Fix a bug in adding frame parallel unit test. Add VP9 frame-parallel unit test. Manually pick "Make the api behavior conform to api spec." from master branch. Move vp9_dec_build_inter_predictors_* to decoder folder. Add segmentation map array for current and last frame segmentation. Include the right header for VP9 worker thread. Move vp9_thread.* to common. ctrl_get_reference does not need user_priv. Seperate the frame buffers from VP9 encoder/decoder structure. Revert "Revert "Revert "Revert 3 patches from Hangyu to get Chrome to build:""" Conflicts: test/codec_factory.h test/decode_test_driver.cc test/decode_test_driver.h test/invalid_file_test.cc test/test-data.sha1 test/test.mk test/test_vectors.cc vp8/vp8_dx_iface.c vp9/common/vp9_alloccommon.c vp9/common/vp9_entropymode.c vp9/common/vp9_loopfilter_thread.c vp9/common/vp9_loopfilter_thread.h vp9/common/vp9_mvref_common.c vp9/common/vp9_onyxc_int.h vp9/common/vp9_reconinter.c vp9/decoder/vp9_decodeframe.c vp9/decoder/vp9_decodeframe.h vp9/decoder/vp9_decodemv.c vp9/decoder/vp9_decoder.c vp9/decoder/vp9_decoder.h vp9/encoder/vp9_encoder.c vp9/encoder/vp9_pickmode.c vp9/encoder/vp9_rdopt.c vp9/vp9_cx_iface.c vp9/vp9_dx_iface.c This reverts commit a18da9760a74d9ce6fb9f875706dc639c95402f5. Change-Id: I361442ffec1586d036ea2e0ee97ce4f077585f02
2015-01-27 21:26:28 +01:00
// Synchronize here for frame parallel decode if sync function is provided.
if (sync != NULL) {
sync(data, mi_row);
}
// Check the last frame's mode and mv info.
if (cm->use_prev_frame_mvs) {
if (prev_frame_mvs->ref_frame[0] == ref_frame) {
ADD_MV_REF_LIST(prev_frame_mvs->mv[0], refmv_count, mv_ref_list, Done);
} else if (prev_frame_mvs->ref_frame[1] == ref_frame) {
ADD_MV_REF_LIST(prev_frame_mvs->mv[1], refmv_count, mv_ref_list, Done);
}
}
// Since we couldn't find 2 mvs from the same reference frame
// go back through the neighbors and find motion vectors from
// different reference frames.
if (different_ref_found) {
for (i = 0; i < MVREF_NEIGHBOURS; ++i) {
const POSITION *mv_ref = &mv_ref_search[i];
if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) {
const MB_MODE_INFO *const candidate = &xd->mi[mv_ref->col + mv_ref->row
* xd->mi_stride].src_mi->mbmi;
// If the candidate is INTRA we don't want to consider its mv.
IF_DIFF_REF_FRAME_ADD_MV(candidate, ref_frame, ref_sign_bias,
refmv_count, mv_ref_list, Done);
}
}
}
// Since we still don't have a candidate we'll try the last frame.
if (cm->use_prev_frame_mvs) {
if (prev_frame_mvs->ref_frame[0] != ref_frame &&
prev_frame_mvs->ref_frame[0] > INTRA_FRAME) {
int_mv mv = prev_frame_mvs->mv[0];
if (ref_sign_bias[prev_frame_mvs->ref_frame[0]] !=
ref_sign_bias[ref_frame]) {
mv.as_mv.row *= -1;
mv.as_mv.col *= -1;
}
ADD_MV_REF_LIST(mv, refmv_count, mv_ref_list, Done);
}
if (prev_frame_mvs->ref_frame[1] > INTRA_FRAME &&
prev_frame_mvs->ref_frame[1] != ref_frame &&
prev_frame_mvs->mv[1].as_int != prev_frame_mvs->mv[0].as_int) {
int_mv mv = prev_frame_mvs->mv[1];
if (ref_sign_bias[prev_frame_mvs->ref_frame[1]] !=
ref_sign_bias[ref_frame]) {
mv.as_mv.row *= -1;
mv.as_mv.col *= -1;
}
ADD_MV_REF_LIST(mv, refmv_count, mv_ref_list, Done);
}
}
Done:
mi->mbmi.mode_context[ref_frame] = counter_to_context[context_counter];
// Clamp vectors
for (i = 0; i < MAX_MV_REF_CANDIDATES; ++i)
clamp_mv_ref(&mv_ref_list[i].as_mv, xd);
}
void vp9_find_mv_refs(const VP9_COMMON *cm, const MACROBLOCKD *xd,
const TileInfo *const tile,
MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame,
int_mv *mv_ref_list,
Try again to merge branch 'frame-parallel' into master branch. In frame parallel decode, libvpx decoder decodes several frames on all cpus in parallel fashion. If not being flushed, it will only return frame when all the cpus are busy. If getting flushed, it will return all the frames in the decoder. Compare with current serial decode mode in which libvpx decoder is idle between decode calls, libvpx decoder is busy between decode calls. Current frame parallel decode will only speed up the decoding for frame parallel encoded videos. For non frame parallel encoded videos, frame parallel decode is slower than serial decode due to lack of loopfilter worker thread. There are still some known issues that need to be addressed. For example: decode frame parallel videos with segmentation enabled is not right sometimes. * frame-parallel: Add error handling for frame parallel decode and unit test for that. Fix a bug in frame parallel decode and add a unit test for that. Add two test vectors to test frame parallel decode. Add key frame seeking to webmdec and webm_video_source. Implement frame parallel decode for VP9. Increase the thread test range to cover 5, 6, 7, 8 threads. Fix a bug in adding frame parallel unit test. Add VP9 frame-parallel unit test. Manually pick "Make the api behavior conform to api spec." from master branch. Move vp9_dec_build_inter_predictors_* to decoder folder. Add segmentation map array for current and last frame segmentation. Include the right header for VP9 worker thread. Move vp9_thread.* to common. ctrl_get_reference does not need user_priv. Seperate the frame buffers from VP9 encoder/decoder structure. Revert "Revert "Revert "Revert 3 patches from Hangyu to get Chrome to build:""" Conflicts: test/codec_factory.h test/decode_test_driver.cc test/decode_test_driver.h test/invalid_file_test.cc test/test-data.sha1 test/test.mk test/test_vectors.cc vp8/vp8_dx_iface.c vp9/common/vp9_alloccommon.c vp9/common/vp9_entropymode.c vp9/common/vp9_loopfilter_thread.c vp9/common/vp9_loopfilter_thread.h vp9/common/vp9_mvref_common.c vp9/common/vp9_onyxc_int.h vp9/common/vp9_reconinter.c vp9/decoder/vp9_decodeframe.c vp9/decoder/vp9_decodeframe.h vp9/decoder/vp9_decodemv.c vp9/decoder/vp9_decoder.c vp9/decoder/vp9_decoder.h vp9/encoder/vp9_encoder.c vp9/encoder/vp9_pickmode.c vp9/encoder/vp9_rdopt.c vp9/vp9_cx_iface.c vp9/vp9_dx_iface.c This reverts commit a18da9760a74d9ce6fb9f875706dc639c95402f5. Change-Id: I361442ffec1586d036ea2e0ee97ce4f077585f02
2015-01-27 21:26:28 +01:00
int mi_row, int mi_col,
find_mv_refs_sync sync, void *const data) {
find_mv_refs_idx(cm, xd, tile, mi, ref_frame, mv_ref_list, -1,
Try again to merge branch 'frame-parallel' into master branch. In frame parallel decode, libvpx decoder decodes several frames on all cpus in parallel fashion. If not being flushed, it will only return frame when all the cpus are busy. If getting flushed, it will return all the frames in the decoder. Compare with current serial decode mode in which libvpx decoder is idle between decode calls, libvpx decoder is busy between decode calls. Current frame parallel decode will only speed up the decoding for frame parallel encoded videos. For non frame parallel encoded videos, frame parallel decode is slower than serial decode due to lack of loopfilter worker thread. There are still some known issues that need to be addressed. For example: decode frame parallel videos with segmentation enabled is not right sometimes. * frame-parallel: Add error handling for frame parallel decode and unit test for that. Fix a bug in frame parallel decode and add a unit test for that. Add two test vectors to test frame parallel decode. Add key frame seeking to webmdec and webm_video_source. Implement frame parallel decode for VP9. Increase the thread test range to cover 5, 6, 7, 8 threads. Fix a bug in adding frame parallel unit test. Add VP9 frame-parallel unit test. Manually pick "Make the api behavior conform to api spec." from master branch. Move vp9_dec_build_inter_predictors_* to decoder folder. Add segmentation map array for current and last frame segmentation. Include the right header for VP9 worker thread. Move vp9_thread.* to common. ctrl_get_reference does not need user_priv. Seperate the frame buffers from VP9 encoder/decoder structure. Revert "Revert "Revert "Revert 3 patches from Hangyu to get Chrome to build:""" Conflicts: test/codec_factory.h test/decode_test_driver.cc test/decode_test_driver.h test/invalid_file_test.cc test/test-data.sha1 test/test.mk test/test_vectors.cc vp8/vp8_dx_iface.c vp9/common/vp9_alloccommon.c vp9/common/vp9_entropymode.c vp9/common/vp9_loopfilter_thread.c vp9/common/vp9_loopfilter_thread.h vp9/common/vp9_mvref_common.c vp9/common/vp9_onyxc_int.h vp9/common/vp9_reconinter.c vp9/decoder/vp9_decodeframe.c vp9/decoder/vp9_decodeframe.h vp9/decoder/vp9_decodemv.c vp9/decoder/vp9_decoder.c vp9/decoder/vp9_decoder.h vp9/encoder/vp9_encoder.c vp9/encoder/vp9_pickmode.c vp9/encoder/vp9_rdopt.c vp9/vp9_cx_iface.c vp9/vp9_dx_iface.c This reverts commit a18da9760a74d9ce6fb9f875706dc639c95402f5. Change-Id: I361442ffec1586d036ea2e0ee97ce4f077585f02
2015-01-27 21:26:28 +01:00
mi_row, mi_col, sync, data);
}
static void lower_mv_precision(MV *mv, int allow_hp) {
const int use_hp = allow_hp && vp9_use_mv_hp(mv);
if (!use_hp) {
if (mv->row & 1)
mv->row += (mv->row > 0 ? -1 : 1);
if (mv->col & 1)
mv->col += (mv->col > 0 ? -1 : 1);
}
}
void vp9_find_best_ref_mvs(MACROBLOCKD *xd, int allow_hp,
int_mv *mvlist, int_mv *nearest_mv,
int_mv *near_mv) {
int i;
// Make sure all the candidates are properly clamped etc
for (i = 0; i < MAX_MV_REF_CANDIDATES; ++i) {
lower_mv_precision(&mvlist[i].as_mv, allow_hp);
clamp_mv2(&mvlist[i].as_mv, xd);
}
*nearest_mv = mvlist[0];
*near_mv = mvlist[1];
}
void vp9_append_sub8x8_mvs_for_idx(VP9_COMMON *cm, MACROBLOCKD *xd,
const TileInfo *const tile,
int block, int ref, int mi_row, int mi_col,
int_mv *nearest_mv, int_mv *near_mv) {
int_mv mv_list[MAX_MV_REF_CANDIDATES];
MODE_INFO *const mi = xd->mi[0].src_mi;
b_mode_info *bmi = mi->bmi;
int n;
assert(MAX_MV_REF_CANDIDATES == 2);
find_mv_refs_idx(cm, xd, tile, mi, mi->mbmi.ref_frame[ref], mv_list, block,
Try again to merge branch 'frame-parallel' into master branch. In frame parallel decode, libvpx decoder decodes several frames on all cpus in parallel fashion. If not being flushed, it will only return frame when all the cpus are busy. If getting flushed, it will return all the frames in the decoder. Compare with current serial decode mode in which libvpx decoder is idle between decode calls, libvpx decoder is busy between decode calls. Current frame parallel decode will only speed up the decoding for frame parallel encoded videos. For non frame parallel encoded videos, frame parallel decode is slower than serial decode due to lack of loopfilter worker thread. There are still some known issues that need to be addressed. For example: decode frame parallel videos with segmentation enabled is not right sometimes. * frame-parallel: Add error handling for frame parallel decode and unit test for that. Fix a bug in frame parallel decode and add a unit test for that. Add two test vectors to test frame parallel decode. Add key frame seeking to webmdec and webm_video_source. Implement frame parallel decode for VP9. Increase the thread test range to cover 5, 6, 7, 8 threads. Fix a bug in adding frame parallel unit test. Add VP9 frame-parallel unit test. Manually pick "Make the api behavior conform to api spec." from master branch. Move vp9_dec_build_inter_predictors_* to decoder folder. Add segmentation map array for current and last frame segmentation. Include the right header for VP9 worker thread. Move vp9_thread.* to common. ctrl_get_reference does not need user_priv. Seperate the frame buffers from VP9 encoder/decoder structure. Revert "Revert "Revert "Revert 3 patches from Hangyu to get Chrome to build:""" Conflicts: test/codec_factory.h test/decode_test_driver.cc test/decode_test_driver.h test/invalid_file_test.cc test/test-data.sha1 test/test.mk test/test_vectors.cc vp8/vp8_dx_iface.c vp9/common/vp9_alloccommon.c vp9/common/vp9_entropymode.c vp9/common/vp9_loopfilter_thread.c vp9/common/vp9_loopfilter_thread.h vp9/common/vp9_mvref_common.c vp9/common/vp9_onyxc_int.h vp9/common/vp9_reconinter.c vp9/decoder/vp9_decodeframe.c vp9/decoder/vp9_decodeframe.h vp9/decoder/vp9_decodemv.c vp9/decoder/vp9_decoder.c vp9/decoder/vp9_decoder.h vp9/encoder/vp9_encoder.c vp9/encoder/vp9_pickmode.c vp9/encoder/vp9_rdopt.c vp9/vp9_cx_iface.c vp9/vp9_dx_iface.c This reverts commit a18da9760a74d9ce6fb9f875706dc639c95402f5. Change-Id: I361442ffec1586d036ea2e0ee97ce4f077585f02
2015-01-27 21:26:28 +01:00
mi_row, mi_col, NULL, NULL);
near_mv->as_int = 0;
switch (block) {
case 0:
nearest_mv->as_int = mv_list[0].as_int;
near_mv->as_int = mv_list[1].as_int;
break;
case 1:
case 2:
nearest_mv->as_int = bmi[0].as_mv[ref].as_int;
for (n = 0; n < MAX_MV_REF_CANDIDATES; ++n)
if (nearest_mv->as_int != mv_list[n].as_int) {
near_mv->as_int = mv_list[n].as_int;
break;
}
break;
case 3: {
int_mv candidates[2 + MAX_MV_REF_CANDIDATES];
candidates[0] = bmi[1].as_mv[ref];
candidates[1] = bmi[0].as_mv[ref];
candidates[2] = mv_list[0];
candidates[3] = mv_list[1];
nearest_mv->as_int = bmi[2].as_mv[ref].as_int;
for (n = 0; n < 2 + MAX_MV_REF_CANDIDATES; ++n)
if (nearest_mv->as_int != candidates[n].as_int) {
near_mv->as_int = candidates[n].as_int;
break;
}
break;
}
default:
assert("Invalid block index.");
}
}